649 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			649 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| |
 | |
| |	round.sa 3.4 7/29/91
 | |
| |
 | |
| |	handle rounding and normalization tasks
 | |
| |
 | |
| |
 | |
| |
 | |
| |		Copyright (C) Motorola, Inc. 1990
 | |
| |			All Rights Reserved
 | |
| |
 | |
| |       For details on the license for this file, please see the
 | |
| |       file, README, in this same directory.
 | |
| 
 | |
| |ROUND	idnt    2,1 | Motorola 040 Floating Point Software Package
 | |
| 
 | |
| 	|section	8
 | |
| 
 | |
| #include "fpsp.h"
 | |
| 
 | |
| |
 | |
| |	round --- round result according to precision/mode
 | |
| |
 | |
| |	a0 points to the input operand in the internal extended format
 | |
| |	d1(high word) contains rounding precision:
 | |
| |		ext = $0000xxxx
 | |
| |		sgl = $0001xxxx
 | |
| |		dbl = $0002xxxx
 | |
| |	d1(low word) contains rounding mode:
 | |
| |		RN  = $xxxx0000
 | |
| |		RZ  = $xxxx0001
 | |
| |		RM  = $xxxx0010
 | |
| |		RP  = $xxxx0011
 | |
| |	d0{31:29} contains the g,r,s bits (extended)
 | |
| |
 | |
| |	On return the value pointed to by a0 is correctly rounded,
 | |
| |	a0 is preserved and the g-r-s bits in d0 are cleared.
 | |
| |	The result is not typed - the tag field is invalid.  The
 | |
| |	result is still in the internal extended format.
 | |
| |
 | |
| |	The INEX bit of USER_FPSR will be set if the rounded result was
 | |
| |	inexact (i.e. if any of the g-r-s bits were set).
 | |
| |
 | |
| 
 | |
| 	.global	round
 | |
| round:
 | |
| | If g=r=s=0 then result is exact and round is done, else set
 | |
| | the inex flag in status reg and continue.
 | |
| |
 | |
| 	bsrs	ext_grs			|this subroutine looks at the
 | |
| |					:rounding precision and sets
 | |
| |					;the appropriate g-r-s bits.
 | |
| 	tstl	%d0			|if grs are zero, go force
 | |
| 	bne	rnd_cont		|lower bits to zero for size
 | |
| 
 | |
| 	swap	%d1			|set up d1.w for round prec.
 | |
| 	bra	truncate
 | |
| 
 | |
| rnd_cont:
 | |
| |
 | |
| | Use rounding mode as an index into a jump table for these modes.
 | |
| |
 | |
| 	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
 | |
| 	lea	mode_tab,%a1
 | |
| 	movel	(%a1,%d1.w*4),%a1
 | |
| 	jmp	(%a1)
 | |
| |
 | |
| | Jump table indexed by rounding mode in d1.w.  All following assumes
 | |
| | grs != 0.
 | |
| |
 | |
| mode_tab:
 | |
| 	.long	rnd_near
 | |
| 	.long	rnd_zero
 | |
| 	.long	rnd_mnus
 | |
| 	.long	rnd_plus
 | |
| |
 | |
| |	ROUND PLUS INFINITY
 | |
| |
 | |
| |	If sign of fp number = 0 (positive), then add 1 to l.
 | |
| |
 | |
| rnd_plus:
 | |
| 	swap	%d1			|set up d1 for round prec.
 | |
| 	tstb	LOCAL_SGN(%a0)		|check for sign
 | |
| 	bmi	truncate		|if positive then truncate
 | |
| 	movel	#0xffffffff,%d0		|force g,r,s to be all f's
 | |
| 	lea	add_to_l,%a1
 | |
| 	movel	(%a1,%d1.w*4),%a1
 | |
| 	jmp	(%a1)
 | |
| |
 | |
| |	ROUND MINUS INFINITY
 | |
| |
 | |
| |	If sign of fp number = 1 (negative), then add 1 to l.
 | |
| |
 | |
| rnd_mnus:
 | |
| 	swap	%d1			|set up d1 for round prec.
 | |
| 	tstb	LOCAL_SGN(%a0)		|check for sign
 | |
| 	bpl	truncate		|if negative then truncate
 | |
| 	movel	#0xffffffff,%d0		|force g,r,s to be all f's
 | |
| 	lea	add_to_l,%a1
 | |
| 	movel	(%a1,%d1.w*4),%a1
 | |
| 	jmp	(%a1)
 | |
| |
 | |
| |	ROUND ZERO
 | |
| |
 | |
| |	Always truncate.
 | |
| rnd_zero:
 | |
| 	swap	%d1			|set up d1 for round prec.
 | |
| 	bra	truncate
 | |
| |
 | |
| |
 | |
| |	ROUND NEAREST
 | |
| |
 | |
| |	If (g=1), then add 1 to l and if (r=s=0), then clear l
 | |
| |	Note that this will round to even in case of a tie.
 | |
| |
 | |
| rnd_near:
 | |
| 	swap	%d1			|set up d1 for round prec.
 | |
| 	asll	#1,%d0			|shift g-bit to c-bit
 | |
| 	bcc	truncate		|if (g=1) then
 | |
| 	lea	add_to_l,%a1
 | |
| 	movel	(%a1,%d1.w*4),%a1
 | |
| 	jmp	(%a1)
 | |
| 
 | |
| |
 | |
| |	ext_grs --- extract guard, round and sticky bits
 | |
| |
 | |
| | Input:	d1 =		PREC:ROUND
 | |
| | Output:	d0{31:29}=	guard, round, sticky
 | |
| |
 | |
| | The ext_grs extract the guard/round/sticky bits according to the
 | |
| | selected rounding precision. It is called by the round subroutine
 | |
| | only.  All registers except d0 are kept intact. d0 becomes an
 | |
| | updated guard,round,sticky in d0{31:29}
 | |
| |
 | |
| | Notes: the ext_grs uses the round PREC, and therefore has to swap d1
 | |
| |	 prior to usage, and needs to restore d1 to original.
 | |
| |
 | |
| ext_grs:
 | |
| 	swap	%d1			|have d1.w point to round precision
 | |
| 	cmpiw	#0,%d1
 | |
| 	bnes	sgl_or_dbl
 | |
| 	bras	end_ext_grs
 | |
| 
 | |
| sgl_or_dbl:
 | |
| 	moveml	%d2/%d3,-(%a7)		|make some temp registers
 | |
| 	cmpiw	#1,%d1
 | |
| 	bnes	grs_dbl
 | |
| grs_sgl:
 | |
| 	bfextu	LOCAL_HI(%a0){#24:#2},%d3	|sgl prec. g-r are 2 bits right
 | |
| 	movel	#30,%d2			|of the sgl prec. limits
 | |
| 	lsll	%d2,%d3			|shift g-r bits to MSB of d3
 | |
| 	movel	LOCAL_HI(%a0),%d2		|get word 2 for s-bit test
 | |
| 	andil	#0x0000003f,%d2		|s bit is the or of all other
 | |
| 	bnes	st_stky			|bits to the right of g-r
 | |
| 	tstl	LOCAL_LO(%a0)		|test lower mantissa
 | |
| 	bnes	st_stky			|if any are set, set sticky
 | |
| 	tstl	%d0			|test original g,r,s
 | |
| 	bnes	st_stky			|if any are set, set sticky
 | |
| 	bras	end_sd			|if words 3 and 4 are clr, exit
 | |
| grs_dbl:
 | |
| 	bfextu	LOCAL_LO(%a0){#21:#2},%d3	|dbl-prec. g-r are 2 bits right
 | |
| 	movel	#30,%d2			|of the dbl prec. limits
 | |
| 	lsll	%d2,%d3			|shift g-r bits to the MSB of d3
 | |
| 	movel	LOCAL_LO(%a0),%d2		|get lower mantissa  for s-bit test
 | |
| 	andil	#0x000001ff,%d2		|s bit is the or-ing of all
 | |
| 	bnes	st_stky			|other bits to the right of g-r
 | |
| 	tstl	%d0			|test word original g,r,s
 | |
| 	bnes	st_stky			|if any are set, set sticky
 | |
| 	bras	end_sd			|if clear, exit
 | |
| st_stky:
 | |
| 	bset	#rnd_stky_bit,%d3
 | |
| end_sd:
 | |
| 	movel	%d3,%d0			|return grs to d0
 | |
| 	moveml	(%a7)+,%d2/%d3		|restore scratch registers
 | |
| end_ext_grs:
 | |
| 	swap	%d1			|restore d1 to original
 | |
| 	rts
 | |
| 
 | |
| |*******************  Local Equates
 | |
| 	.set	ad_1_sgl,0x00000100	|  constant to add 1 to l-bit in sgl prec
 | |
| 	.set	ad_1_dbl,0x00000800	|  constant to add 1 to l-bit in dbl prec
 | |
| 
 | |
| 
 | |
| |Jump table for adding 1 to the l-bit indexed by rnd prec
 | |
| 
 | |
| add_to_l:
 | |
| 	.long	add_ext
 | |
| 	.long	add_sgl
 | |
| 	.long	add_dbl
 | |
| 	.long	add_dbl
 | |
| |
 | |
| |	ADD SINGLE
 | |
| |
 | |
| add_sgl:
 | |
| 	addl	#ad_1_sgl,LOCAL_HI(%a0)
 | |
| 	bccs	scc_clr			|no mantissa overflow
 | |
| 	roxrw  LOCAL_HI(%a0)		|shift v-bit back in
 | |
| 	roxrw  LOCAL_HI+2(%a0)		|shift v-bit back in
 | |
| 	addw	#0x1,LOCAL_EX(%a0)	|and incr exponent
 | |
| scc_clr:
 | |
| 	tstl	%d0			|test for rs = 0
 | |
| 	bnes	sgl_done
 | |
| 	andiw  #0xfe00,LOCAL_HI+2(%a0)	|clear the l-bit
 | |
| sgl_done:
 | |
| 	andil	#0xffffff00,LOCAL_HI(%a0) |truncate bits beyond sgl limit
 | |
| 	clrl	LOCAL_LO(%a0)		|clear d2
 | |
| 	rts
 | |
| 
 | |
| |
 | |
| |	ADD EXTENDED
 | |
| |
 | |
| add_ext:
 | |
| 	addql  #1,LOCAL_LO(%a0)		|add 1 to l-bit
 | |
| 	bccs	xcc_clr			|test for carry out
 | |
| 	addql  #1,LOCAL_HI(%a0)		|propagate carry
 | |
| 	bccs	xcc_clr
 | |
| 	roxrw  LOCAL_HI(%a0)		|mant is 0 so restore v-bit
 | |
| 	roxrw  LOCAL_HI+2(%a0)		|mant is 0 so restore v-bit
 | |
| 	roxrw	LOCAL_LO(%a0)
 | |
| 	roxrw	LOCAL_LO+2(%a0)
 | |
| 	addw	#0x1,LOCAL_EX(%a0)	|and inc exp
 | |
| xcc_clr:
 | |
| 	tstl	%d0			|test rs = 0
 | |
| 	bnes	add_ext_done
 | |
| 	andib	#0xfe,LOCAL_LO+3(%a0)	|clear the l bit
 | |
| add_ext_done:
 | |
| 	rts
 | |
| |
 | |
| |	ADD DOUBLE
 | |
| |
 | |
| add_dbl:
 | |
| 	addl	#ad_1_dbl,LOCAL_LO(%a0)
 | |
| 	bccs	dcc_clr
 | |
| 	addql	#1,LOCAL_HI(%a0)		|propagate carry
 | |
| 	bccs	dcc_clr
 | |
| 	roxrw	LOCAL_HI(%a0)		|mant is 0 so restore v-bit
 | |
| 	roxrw	LOCAL_HI+2(%a0)		|mant is 0 so restore v-bit
 | |
| 	roxrw	LOCAL_LO(%a0)
 | |
| 	roxrw	LOCAL_LO+2(%a0)
 | |
| 	addw	#0x1,LOCAL_EX(%a0)	|incr exponent
 | |
| dcc_clr:
 | |
| 	tstl	%d0			|test for rs = 0
 | |
| 	bnes	dbl_done
 | |
| 	andiw	#0xf000,LOCAL_LO+2(%a0)	|clear the l-bit
 | |
| 
 | |
| dbl_done:
 | |
| 	andil	#0xfffff800,LOCAL_LO(%a0) |truncate bits beyond dbl limit
 | |
| 	rts
 | |
| 
 | |
| error:
 | |
| 	rts
 | |
| |
 | |
| | Truncate all other bits
 | |
| |
 | |
| trunct:
 | |
| 	.long	end_rnd
 | |
| 	.long	sgl_done
 | |
| 	.long	dbl_done
 | |
| 	.long	dbl_done
 | |
| 
 | |
| truncate:
 | |
| 	lea	trunct,%a1
 | |
| 	movel	(%a1,%d1.w*4),%a1
 | |
| 	jmp	(%a1)
 | |
| 
 | |
| end_rnd:
 | |
| 	rts
 | |
| 
 | |
| |
 | |
| |	NORMALIZE
 | |
| |
 | |
| | These routines (nrm_zero & nrm_set) normalize the unnorm.  This
 | |
| | is done by shifting the mantissa left while decrementing the
 | |
| | exponent.
 | |
| |
 | |
| | NRM_SET shifts and decrements until there is a 1 set in the integer
 | |
| | bit of the mantissa (msb in d1).
 | |
| |
 | |
| | NRM_ZERO shifts and decrements until there is a 1 set in the integer
 | |
| | bit of the mantissa (msb in d1) unless this would mean the exponent
 | |
| | would go less than 0.  In that case the number becomes a denorm - the
 | |
| | exponent (d0) is set to 0 and the mantissa (d1 & d2) is not
 | |
| | normalized.
 | |
| |
 | |
| | Note that both routines have been optimized (for the worst case) and
 | |
| | therefore do not have the easy to follow decrement/shift loop.
 | |
| |
 | |
| |	NRM_ZERO
 | |
| |
 | |
| |	Distance to first 1 bit in mantissa = X
 | |
| |	Distance to 0 from exponent = Y
 | |
| |	If X < Y
 | |
| |	Then
 | |
| |	  nrm_set
 | |
| |	Else
 | |
| |	  shift mantissa by Y
 | |
| |	  set exponent = 0
 | |
| |
 | |
| |input:
 | |
| |	FP_SCR1 = exponent, ms mantissa part, ls mantissa part
 | |
| |output:
 | |
| |	L_SCR1{4} = fpte15 or ete15 bit
 | |
| |
 | |
| 	.global	nrm_zero
 | |
| nrm_zero:
 | |
| 	movew	LOCAL_EX(%a0),%d0
 | |
| 	cmpw   #64,%d0          |see if exp > 64
 | |
| 	bmis	d0_less
 | |
| 	bsr	nrm_set		|exp > 64 so exp won't exceed 0
 | |
| 	rts
 | |
| d0_less:
 | |
| 	moveml	%d2/%d3/%d5/%d6,-(%a7)
 | |
| 	movel	LOCAL_HI(%a0),%d1
 | |
| 	movel	LOCAL_LO(%a0),%d2
 | |
| 
 | |
| 	bfffo	%d1{#0:#32},%d3	|get the distance to the first 1
 | |
| |				;in ms mant
 | |
| 	beqs	ms_clr		|branch if no bits were set
 | |
| 	cmpw	%d3,%d0		|of X>Y
 | |
| 	bmis	greater		|then exp will go past 0 (neg) if
 | |
| |				;it is just shifted
 | |
| 	bsr	nrm_set		|else exp won't go past 0
 | |
| 	moveml	(%a7)+,%d2/%d3/%d5/%d6
 | |
| 	rts
 | |
| greater:
 | |
| 	movel	%d2,%d6		|save ls mant in d6
 | |
| 	lsll	%d0,%d2		|shift ls mant by count
 | |
| 	lsll	%d0,%d1		|shift ms mant by count
 | |
| 	movel	#32,%d5
 | |
| 	subl	%d0,%d5		|make op a denorm by shifting bits
 | |
| 	lsrl	%d5,%d6		|by the number in the exp, then
 | |
| |				;set exp = 0.
 | |
| 	orl	%d6,%d1		|shift the ls mant bits into the ms mant
 | |
| 	movel	#0,%d0		|same as if decremented exp to 0
 | |
| |				;while shifting
 | |
| 	movew	%d0,LOCAL_EX(%a0)
 | |
| 	movel	%d1,LOCAL_HI(%a0)
 | |
| 	movel	%d2,LOCAL_LO(%a0)
 | |
| 	moveml	(%a7)+,%d2/%d3/%d5/%d6
 | |
| 	rts
 | |
| ms_clr:
 | |
| 	bfffo	%d2{#0:#32},%d3	|check if any bits set in ls mant
 | |
| 	beqs	all_clr		|branch if none set
 | |
| 	addw	#32,%d3
 | |
| 	cmpw	%d3,%d0		|if X>Y
 | |
| 	bmis	greater		|then branch
 | |
| 	bsr	nrm_set		|else exp won't go past 0
 | |
| 	moveml	(%a7)+,%d2/%d3/%d5/%d6
 | |
| 	rts
 | |
| all_clr:
 | |
| 	movew	#0,LOCAL_EX(%a0)	|no mantissa bits set. Set exp = 0.
 | |
| 	moveml	(%a7)+,%d2/%d3/%d5/%d6
 | |
| 	rts
 | |
| |
 | |
| |	NRM_SET
 | |
| |
 | |
| 	.global	nrm_set
 | |
| nrm_set:
 | |
| 	movel	%d7,-(%a7)
 | |
| 	bfffo	LOCAL_HI(%a0){#0:#32},%d7 |find first 1 in ms mant to d7)
 | |
| 	beqs	lower		|branch if ms mant is all 0's
 | |
| 
 | |
| 	movel	%d6,-(%a7)
 | |
| 
 | |
| 	subw	%d7,LOCAL_EX(%a0)	|sub exponent by count
 | |
| 	movel	LOCAL_HI(%a0),%d0	|d0 has ms mant
 | |
| 	movel	LOCAL_LO(%a0),%d1 |d1 has ls mant
 | |
| 
 | |
| 	lsll	%d7,%d0		|shift first 1 to j bit position
 | |
| 	movel	%d1,%d6		|copy ls mant into d6
 | |
| 	lsll	%d7,%d6		|shift ls mant by count
 | |
| 	movel	%d6,LOCAL_LO(%a0)	|store ls mant into memory
 | |
| 	moveql	#32,%d6
 | |
| 	subl	%d7,%d6		|continue shift
 | |
| 	lsrl	%d6,%d1		|shift off all bits but those that will
 | |
| |				;be shifted into ms mant
 | |
| 	orl	%d1,%d0		|shift the ls mant bits into the ms mant
 | |
| 	movel	%d0,LOCAL_HI(%a0)	|store ms mant into memory
 | |
| 	moveml	(%a7)+,%d7/%d6	|restore registers
 | |
| 	rts
 | |
| 
 | |
| |
 | |
| | We get here if ms mant was = 0, and we assume ls mant has bits
 | |
| | set (otherwise this would have been tagged a zero not a denorm).
 | |
| |
 | |
| lower:
 | |
| 	movew	LOCAL_EX(%a0),%d0	|d0 has exponent
 | |
| 	movel	LOCAL_LO(%a0),%d1	|d1 has ls mant
 | |
| 	subw	#32,%d0		|account for ms mant being all zeros
 | |
| 	bfffo	%d1{#0:#32},%d7	|find first 1 in ls mant to d7)
 | |
| 	subw	%d7,%d0		|subtract shift count from exp
 | |
| 	lsll	%d7,%d1		|shift first 1 to integer bit in ms mant
 | |
| 	movew	%d0,LOCAL_EX(%a0)	|store ms mant
 | |
| 	movel	%d1,LOCAL_HI(%a0)	|store exp
 | |
| 	clrl	LOCAL_LO(%a0)	|clear ls mant
 | |
| 	movel	(%a7)+,%d7
 | |
| 	rts
 | |
| |
 | |
| |	denorm --- denormalize an intermediate result
 | |
| |
 | |
| |	Used by underflow.
 | |
| |
 | |
| | Input:
 | |
| |	a0	 points to the operand to be denormalized
 | |
| |		 (in the internal extended format)
 | |
| |
 | |
| |	d0:	 rounding precision
 | |
| | Output:
 | |
| |	a0	 points to the denormalized result
 | |
| |		 (in the internal extended format)
 | |
| |
 | |
| |	d0	is guard,round,sticky
 | |
| |
 | |
| | d0 comes into this routine with the rounding precision. It
 | |
| | is then loaded with the denormalized exponent threshold for the
 | |
| | rounding precision.
 | |
| |
 | |
| 
 | |
| 	.global	denorm
 | |
| denorm:
 | |
| 	btstb	#6,LOCAL_EX(%a0)	|check for exponents between $7fff-$4000
 | |
| 	beqs	no_sgn_ext
 | |
| 	bsetb	#7,LOCAL_EX(%a0)	|sign extend if it is so
 | |
| no_sgn_ext:
 | |
| 
 | |
| 	cmpib	#0,%d0		|if 0 then extended precision
 | |
| 	bnes	not_ext		|else branch
 | |
| 
 | |
| 	clrl	%d1		|load d1 with ext threshold
 | |
| 	clrl	%d0		|clear the sticky flag
 | |
| 	bsr	dnrm_lp		|denormalize the number
 | |
| 	tstb	%d1		|check for inex
 | |
| 	beq	no_inex		|if clr, no inex
 | |
| 	bras	dnrm_inex	|if set, set inex
 | |
| 
 | |
| not_ext:
 | |
| 	cmpil	#1,%d0		|if 1 then single precision
 | |
| 	beqs	load_sgl	|else must be 2, double prec
 | |
| 
 | |
| load_dbl:
 | |
| 	movew	#dbl_thresh,%d1	|put copy of threshold in d1
 | |
| 	movel	%d1,%d0		|copy d1 into d0
 | |
| 	subw	LOCAL_EX(%a0),%d0	|diff = threshold - exp
 | |
| 	cmpw	#67,%d0		|if diff > 67 (mant + grs bits)
 | |
| 	bpls	chk_stky	|then branch (all bits would be
 | |
| |				; shifted off in denorm routine)
 | |
| 	clrl	%d0		|else clear the sticky flag
 | |
| 	bsr	dnrm_lp		|denormalize the number
 | |
| 	tstb	%d1		|check flag
 | |
| 	beqs	no_inex		|if clr, no inex
 | |
| 	bras	dnrm_inex	|if set, set inex
 | |
| 
 | |
| load_sgl:
 | |
| 	movew	#sgl_thresh,%d1	|put copy of threshold in d1
 | |
| 	movel	%d1,%d0		|copy d1 into d0
 | |
| 	subw	LOCAL_EX(%a0),%d0	|diff = threshold - exp
 | |
| 	cmpw	#67,%d0		|if diff > 67 (mant + grs bits)
 | |
| 	bpls	chk_stky	|then branch (all bits would be
 | |
| |				; shifted off in denorm routine)
 | |
| 	clrl	%d0		|else clear the sticky flag
 | |
| 	bsr	dnrm_lp		|denormalize the number
 | |
| 	tstb	%d1		|check flag
 | |
| 	beqs	no_inex		|if clr, no inex
 | |
| 	bras	dnrm_inex	|if set, set inex
 | |
| 
 | |
| chk_stky:
 | |
| 	tstl	LOCAL_HI(%a0)	|check for any bits set
 | |
| 	bnes	set_stky
 | |
| 	tstl	LOCAL_LO(%a0)	|check for any bits set
 | |
| 	bnes	set_stky
 | |
| 	bras	clr_mant
 | |
| set_stky:
 | |
| 	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
 | |
| 	movel	#0x20000000,%d0	|set sticky bit in return value
 | |
| clr_mant:
 | |
| 	movew	%d1,LOCAL_EX(%a0)		|load exp with threshold
 | |
| 	movel	#0,LOCAL_HI(%a0)	|set d1 = 0 (ms mantissa)
 | |
| 	movel	#0,LOCAL_LO(%a0)		|set d2 = 0 (ms mantissa)
 | |
| 	rts
 | |
| dnrm_inex:
 | |
| 	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
 | |
| no_inex:
 | |
| 	rts
 | |
| 
 | |
| |
 | |
| |	dnrm_lp --- normalize exponent/mantissa to specified threshold
 | |
| |
 | |
| | Input:
 | |
| |	a0		points to the operand to be denormalized
 | |
| |	d0{31:29}	initial guard,round,sticky
 | |
| |	d1{15:0}	denormalization threshold
 | |
| | Output:
 | |
| |	a0		points to the denormalized operand
 | |
| |	d0{31:29}	final guard,round,sticky
 | |
| |	d1.b		inexact flag:  all ones means inexact result
 | |
| |
 | |
| | The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2
 | |
| | so that bfext can be used to extract the new low part of the mantissa.
 | |
| | Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there
 | |
| | is no LOCAL_GRS scratch word following it on the fsave frame.
 | |
| |
 | |
| 	.global	dnrm_lp
 | |
| dnrm_lp:
 | |
| 	movel	%d2,-(%sp)		|save d2 for temp use
 | |
| 	btstb	#E3,E_BYTE(%a6)		|test for type E3 exception
 | |
| 	beqs	not_E3			|not type E3 exception
 | |
| 	bfextu	WBTEMP_GRS(%a6){#6:#3},%d2	|extract guard,round, sticky  bit
 | |
| 	movel	#29,%d0
 | |
| 	lsll	%d0,%d2			|shift g,r,s to their positions
 | |
| 	movel	%d2,%d0
 | |
| not_E3:
 | |
| 	movel	(%sp)+,%d2		|restore d2
 | |
| 	movel	LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6)
 | |
| 	movel	%d0,FP_SCR2+LOCAL_GRS(%a6)
 | |
| 	movel	%d1,%d0			|copy the denorm threshold
 | |
| 	subw	LOCAL_EX(%a0),%d1		|d1 = threshold - uns exponent
 | |
| 	bles	no_lp			|d1 <= 0
 | |
| 	cmpw	#32,%d1
 | |
| 	blts	case_1			|0 = d1 < 32
 | |
| 	cmpw	#64,%d1
 | |
| 	blts	case_2			|32 <= d1 < 64
 | |
| 	bra	case_3			|d1 >= 64
 | |
| |
 | |
| | No normalization necessary
 | |
| |
 | |
| no_lp:
 | |
| 	clrb	%d1			|set no inex2 reported
 | |
| 	movel	FP_SCR2+LOCAL_GRS(%a6),%d0	|restore original g,r,s
 | |
| 	rts
 | |
| |
 | |
| | case (0<d1<32)
 | |
| |
 | |
| case_1:
 | |
| 	movel	%d2,-(%sp)
 | |
| 	movew	%d0,LOCAL_EX(%a0)		|exponent = denorm threshold
 | |
| 	movel	#32,%d0
 | |
| 	subw	%d1,%d0			|d0 = 32 - d1
 | |
| 	bfextu	LOCAL_EX(%a0){%d0:#32},%d2
 | |
| 	bfextu	%d2{%d1:%d0},%d2		|d2 = new LOCAL_HI
 | |
| 	bfextu	LOCAL_HI(%a0){%d0:#32},%d1	|d1 = new LOCAL_LO
 | |
| 	bfextu	FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0	|d0 = new G,R,S
 | |
| 	movel	%d2,LOCAL_HI(%a0)		|store new LOCAL_HI
 | |
| 	movel	%d1,LOCAL_LO(%a0)		|store new LOCAL_LO
 | |
| 	clrb	%d1
 | |
| 	bftst	%d0{#2:#30}
 | |
| 	beqs	c1nstky
 | |
| 	bsetl	#rnd_stky_bit,%d0
 | |
| 	st	%d1
 | |
| c1nstky:
 | |
| 	movel	FP_SCR2+LOCAL_GRS(%a6),%d2	|restore original g,r,s
 | |
| 	andil	#0xe0000000,%d2		|clear all but G,R,S
 | |
| 	tstl	%d2			|test if original G,R,S are clear
 | |
| 	beqs	grs_clear
 | |
| 	orl	#0x20000000,%d0		|set sticky bit in d0
 | |
| grs_clear:
 | |
| 	andil	#0xe0000000,%d0		|clear all but G,R,S
 | |
| 	movel	(%sp)+,%d2
 | |
| 	rts
 | |
| |
 | |
| | case (32<=d1<64)
 | |
| |
 | |
| case_2:
 | |
| 	movel	%d2,-(%sp)
 | |
| 	movew	%d0,LOCAL_EX(%a0)		|unsigned exponent = threshold
 | |
| 	subw	#32,%d1			|d1 now between 0 and 32
 | |
| 	movel	#32,%d0
 | |
| 	subw	%d1,%d0			|d0 = 32 - d1
 | |
| 	bfextu	LOCAL_EX(%a0){%d0:#32},%d2
 | |
| 	bfextu	%d2{%d1:%d0},%d2		|d2 = new LOCAL_LO
 | |
| 	bfextu	LOCAL_HI(%a0){%d0:#32},%d1	|d1 = new G,R,S
 | |
| 	bftst	%d1{#2:#30}
 | |
| 	bnes	c2_sstky		|bra if sticky bit to be set
 | |
| 	bftst	FP_SCR2+LOCAL_LO(%a6){%d0:#32}
 | |
| 	bnes	c2_sstky		|bra if sticky bit to be set
 | |
| 	movel	%d1,%d0
 | |
| 	clrb	%d1
 | |
| 	bras	end_c2
 | |
| c2_sstky:
 | |
| 	movel	%d1,%d0
 | |
| 	bsetl	#rnd_stky_bit,%d0
 | |
| 	st	%d1
 | |
| end_c2:
 | |
| 	clrl	LOCAL_HI(%a0)		|store LOCAL_HI = 0
 | |
| 	movel	%d2,LOCAL_LO(%a0)		|store LOCAL_LO
 | |
| 	movel	FP_SCR2+LOCAL_GRS(%a6),%d2	|restore original g,r,s
 | |
| 	andil	#0xe0000000,%d2		|clear all but G,R,S
 | |
| 	tstl	%d2			|test if original G,R,S are clear
 | |
| 	beqs	clear_grs
 | |
| 	orl	#0x20000000,%d0		|set sticky bit in d0
 | |
| clear_grs:
 | |
| 	andil	#0xe0000000,%d0		|get rid of all but G,R,S
 | |
| 	movel	(%sp)+,%d2
 | |
| 	rts
 | |
| |
 | |
| | d1 >= 64 Force the exponent to be the denorm threshold with the
 | |
| | correct sign.
 | |
| |
 | |
| case_3:
 | |
| 	movew	%d0,LOCAL_EX(%a0)
 | |
| 	tstw	LOCAL_SGN(%a0)
 | |
| 	bges	c3con
 | |
| c3neg:
 | |
| 	orl	#0x80000000,LOCAL_EX(%a0)
 | |
| c3con:
 | |
| 	cmpw	#64,%d1
 | |
| 	beqs	sixty_four
 | |
| 	cmpw	#65,%d1
 | |
| 	beqs	sixty_five
 | |
| |
 | |
| | Shift value is out of range.  Set d1 for inex2 flag and
 | |
| | return a zero with the given threshold.
 | |
| |
 | |
| 	clrl	LOCAL_HI(%a0)
 | |
| 	clrl	LOCAL_LO(%a0)
 | |
| 	movel	#0x20000000,%d0
 | |
| 	st	%d1
 | |
| 	rts
 | |
| 
 | |
| sixty_four:
 | |
| 	movel	LOCAL_HI(%a0),%d0
 | |
| 	bfextu	%d0{#2:#30},%d1
 | |
| 	andil	#0xc0000000,%d0
 | |
| 	bras	c3com
 | |
| 
 | |
| sixty_five:
 | |
| 	movel	LOCAL_HI(%a0),%d0
 | |
| 	bfextu	%d0{#1:#31},%d1
 | |
| 	andil	#0x80000000,%d0
 | |
| 	lsrl	#1,%d0			|shift high bit into R bit
 | |
| 
 | |
| c3com:
 | |
| 	tstl	%d1
 | |
| 	bnes	c3ssticky
 | |
| 	tstl	LOCAL_LO(%a0)
 | |
| 	bnes	c3ssticky
 | |
| 	tstb	FP_SCR2+LOCAL_GRS(%a6)
 | |
| 	bnes	c3ssticky
 | |
| 	clrb	%d1
 | |
| 	bras	c3end
 | |
| 
 | |
| c3ssticky:
 | |
| 	bsetl	#rnd_stky_bit,%d0
 | |
| 	st	%d1
 | |
| c3end:
 | |
| 	clrl	LOCAL_HI(%a0)
 | |
| 	clrl	LOCAL_LO(%a0)
 | |
| 	rts
 | |
| 
 | |
| 	|end
 |