1/*---------------------------------------------------------------------------+ 2 | polynomial_Xsig.S | 3 | | 4 | Fixed point arithmetic polynomial evaluation. | 5 | | 6 | Copyright (C) 1992,1993,1994,1995 | 7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 8 | Australia. E-mail billm@jacobi.maths.monash.edu.au | 9 | | 10 | Call from C as: | 11 | void polynomial_Xsig(Xsig *accum, unsigned long long x, | 12 | unsigned long long terms[], int n) | 13 | | 14 | Computes: | 15 | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | 16 | and adds the result to the 12 byte Xsig. | 17 | The terms[] are each 8 bytes, but all computation is performed to 12 byte | 18 | precision. | 19 | | 20 | This function must be used carefully: most overflow of intermediate | 21 | results is controlled, but overflow of the result is not. | 22 | | 23 +---------------------------------------------------------------------------*/ 24 .file "polynomial_Xsig.S" 25 26#include "fpu_emu.h" 27 28 29#define TERM_SIZE $8 30#define SUM_MS -20(%ebp) /* sum ms long */ 31#define SUM_MIDDLE -24(%ebp) /* sum middle long */ 32#define SUM_LS -28(%ebp) /* sum ls long */ 33#define ACCUM_MS -4(%ebp) /* accum ms long */ 34#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ 35#define ACCUM_LS -12(%ebp) /* accum ls long */ 36#define OVERFLOWED -16(%ebp) /* addition overflow flag */ 37 38.text 39ENTRY(polynomial_Xsig) 40 pushl %ebp 41 movl %esp,%ebp 42 subl $32,%esp 43 pushl %esi 44 pushl %edi 45 pushl %ebx 46 47 movl PARAM2,%esi /* x */ 48 movl PARAM3,%edi /* terms */ 49 50 movl TERM_SIZE,%eax 51 mull PARAM4 /* n */ 52 addl %eax,%edi 53 54 movl 4(%edi),%edx /* terms[n] */ 55 movl %edx,SUM_MS 56 movl (%edi),%edx /* terms[n] */ 57 movl %edx,SUM_MIDDLE 58 xor %eax,%eax 59 movl %eax,SUM_LS 60 movb %al,OVERFLOWED 61 62 subl TERM_SIZE,%edi 63 decl PARAM4 64 js L_accum_done 65 66L_accum_loop: 67 xor %eax,%eax 68 movl %eax,ACCUM_MS 69 movl %eax,ACCUM_MIDDLE 70 71 movl SUM_MIDDLE,%eax 72 mull (%esi) /* x ls long */ 73 movl %edx,ACCUM_LS 74 75 movl SUM_MIDDLE,%eax 76 mull 4(%esi) /* x ms long */ 77 addl %eax,ACCUM_LS 78 adcl %edx,ACCUM_MIDDLE 79 adcl $0,ACCUM_MS 80 81 movl SUM_MS,%eax 82 mull (%esi) /* x ls long */ 83 addl %eax,ACCUM_LS 84 adcl %edx,ACCUM_MIDDLE 85 adcl $0,ACCUM_MS 86 87 movl SUM_MS,%eax 88 mull 4(%esi) /* x ms long */ 89 addl %eax,ACCUM_MIDDLE 90 adcl %edx,ACCUM_MS 91 92 testb $0xff,OVERFLOWED 93 jz L_no_overflow 94 95 movl (%esi),%eax 96 addl %eax,ACCUM_MIDDLE 97 movl 4(%esi),%eax 98 adcl %eax,ACCUM_MS /* This could overflow too */ 99 100L_no_overflow: 101 102/* 103 * Now put the sum of next term and the accumulator 104 * into the sum register 105 */ 106 movl ACCUM_LS,%eax 107 addl (%edi),%eax /* term ls long */ 108 movl %eax,SUM_LS 109 movl ACCUM_MIDDLE,%eax 110 adcl (%edi),%eax /* term ls long */ 111 movl %eax,SUM_MIDDLE 112 movl ACCUM_MS,%eax 113 adcl 4(%edi),%eax /* term ms long */ 114 movl %eax,SUM_MS 115 sbbb %al,%al 116 movb %al,OVERFLOWED /* Used in the next iteration */ 117 118 subl TERM_SIZE,%edi 119 decl PARAM4 120 jns L_accum_loop 121 122L_accum_done: 123 movl PARAM1,%edi /* accum */ 124 movl SUM_LS,%eax 125 addl %eax,(%edi) 126 movl SUM_MIDDLE,%eax 127 adcl %eax,4(%edi) 128 movl SUM_MS,%eax 129 adcl %eax,8(%edi) 130 131 popl %ebx 132 popl %edi 133 popl %esi 134 leave 135 ret 136