Lines Matching +full:x +full:- +full:z
8 | Input: Double-extended value in memory location pointed to by address
11 | Output: Arctan(X) returned in floating-point register Fp0.
19 | argument X such that 1/16 < |X| < 16. For the other arguments,
23 | Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.
25 | Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3.
27 | of X with a bit-1 attached at the 6-th bit position. Define u
28 | to be u = (X-F) / (1 + X*F).
35 | Step 5. If |X| >= 16, go to Step 7.
37 | Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.
39 | Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'.
40 | Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.
217 .set X,FP_SCR1 define
218 .set XDCARE,X+2
219 .set XFRAC,X+4
220 .set XFRACLO,X+8
232 |--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
238 |--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
244 fmovex %fp0,X(%a6)
247 cmpil #0x3FFB8000,%d0 | ...|X| >= 1/16?
252 cmpil #0x4002FFFF,%d0 | ...|X| < 16 ?
257 |--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
258 |--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
259 |--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
260 |--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
261 |--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
262 |--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
263 |--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
264 |--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
265 |--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
266 |--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
267 |--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
268 |--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
269 |--WILL INVOLVE A VERY LONG POLYNOMIAL.
271 |--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
272 |--WE CHOSE F TO BE +-2^K * 1.BBBB1
273 |--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
274 |--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
275 |--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
276 |-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
280 movew #0x0000,XDCARE(%a6) | ...CLEAN UP X JUST IN CASE
282 oril #0x04000000,XFRAC(%a6) | ...SET 6-TH BIT TO 1
283 movel #0x00000000,XFRACLO(%a6) | ...LOCATION OF X IS NOW F
285 fmovex %fp0,%fp1 | ...FP1 IS X
286 fmulx X(%a6),%fp1 | ...FP1 IS X*F, NOTE THAT X*F > 0
287 fsubx X(%a6),%fp0 | ...FP0 IS X-F
288 fadds #0x3F800000,%fp1 | ...FP1 IS 1 + X*F
289 fdivx %fp1,%fp0 | ...FP0 IS U = (X-F)/(1+X*F)
291 |--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
292 |--CREATE ATAN(F) AND STORE IT IN ATANF, AND
293 |--SAVE REGISTERS FP2.
295 movel %d2,-(%a7) | ...SAVE d2 TEMPORARILY
296 movel %d0,%d2 | ...THE EXPO AND 16 BITS OF X
308 movel X(%a6),%d0 | ...LOAD SIGN AND EXPO. AGAIN
313 |--THAT'S ALL I HAVE TO DO FOR NOW,
314 |--BUT ALAS, THE DIVIDE IS STILL CRANKING!
316 |--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
317 |--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
318 |--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
319 |--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
320 |--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
321 |--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
322 |--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
337 faddx ATANF(%a6),%fp0 | ...ATAN(X)
341 |--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
342 |--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
344 bgt ATANBIG | ...I.E. |X| >= 16
347 |--|X| <= 1/16
348 |--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
349 |--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
350 |--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
351 |--WHERE Y = X*X, AND Z = Y*Y.
355 |--COMPUTE POLYNOMIAL
356 fmulx %fp0,%fp0 | ...FP0 IS Y = X*X
362 fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y
367 fmulx %fp1,%fp2 | ...Z*B6
368 fmulx %fp1,%fp3 | ...Z*B5
370 faddd ATANB4,%fp2 | ...B4+Z*B6
371 faddd ATANB3,%fp3 | ...B3+Z*B5
373 fmulx %fp1,%fp2 | ...Z*(B4+Z*B6)
374 fmulx %fp3,%fp1 | ...Z*(B3+Z*B5)
376 faddd ATANB2,%fp2 | ...B2+Z*(B4+Z*B6)
377 faddd ATANB1,%fp1 | ...B1+Z*(B3+Z*B5)
379 fmulx %fp0,%fp2 | ...Y*(B2+Z*(B4+Z*B6))
380 fmulx X(%a6),%fp0 | ...X*Y
382 faddx %fp2,%fp1 | ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
385 fmulx %fp1,%fp0 | ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
388 faddx X(%a6),%fp0
393 |--|X| < 2^(-40), ATAN(X) = X
397 fmovex X(%a6),%fp0 |last inst - possible exception set
402 |--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
403 |--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
407 |--APPROXIMATE ATAN(-1/X) BY
408 |--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
409 |--THIS CAN BE RE-WRITTEN AS
410 |--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
412 fmoves #0xBF800000,%fp1 | ...LOAD -1
413 fdivx %fp0,%fp1 | ...FP1 IS -1/X
416 |--DIVIDE IS STILL CRANKING
418 fmovex %fp1,%fp0 | ...FP0 IS X'
419 fmulx %fp0,%fp0 | ...FP0 IS Y = X'*X'
420 fmovex %fp1,X(%a6) | ...X IS REALLY X'
423 fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y
428 fmulx %fp1,%fp3 | ...Z*C5
429 fmulx %fp1,%fp2 | ...Z*B4
431 faddd ATANC3,%fp3 | ...C3+Z*C5
432 faddd ATANC2,%fp2 | ...C2+Z*C4
434 fmulx %fp3,%fp1 | ...Z*(C3+Z*C5), FP3 RELEASED
435 fmulx %fp0,%fp2 | ...Y*(C2+Z*C4)
437 faddd ATANC1,%fp1 | ...C1+Z*(C3+Z*C5)
438 fmulx X(%a6),%fp0 | ...X'*Y
440 faddx %fp2,%fp1 | ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
443 fmulx %fp1,%fp0 | ...X'*Y*([B1+Z*(B3+Z*B5)]
444 | ... +[Y*(B2+Z*(B4+Z*B6))])
445 faddx X(%a6),%fp0
461 |--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY