Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | /*---------------------------------------------------------------------------+ | polynomial_Xsig.S | | | | Fixed point arithmetic polynomial evaluation. | | | | Copyright (C) 1992,1993,1994,1995 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | Australia. E-mail billm@jacobi.maths.monash.edu.au | | | | Call from C as: | | void polynomial_Xsig(Xsig *accum, unsigned long long x, | | unsigned long long terms[], int n) | | | | Computes: | | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | | and adds the result to the 12 byte Xsig. | | The terms[] are each 8 bytes, but all computation is performed to 12 byte | | precision. | | | | This function must be used carefully: most overflow of intermediate | | results is controlled, but overflow of the result is not. | | | +---------------------------------------------------------------------------*/ .file "polynomial_Xsig.S" #include "fpu_emu.h" #define TERM_SIZE $8 #define SUM_MS -20(%ebp) /* sum ms long */ #define SUM_MIDDLE -24(%ebp) /* sum middle long */ #define SUM_LS -28(%ebp) /* sum ls long */ #define ACCUM_MS -4(%ebp) /* accum ms long */ #define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ #define ACCUM_LS -12(%ebp) /* accum ls long */ #define OVERFLOWED -16(%ebp) /* addition overflow flag */ .text ENTRY(polynomial_Xsig) pushl %ebp movl %esp,%ebp subl $32,%esp pushl %esi pushl %edi pushl %ebx movl PARAM2,%esi /* x */ movl PARAM3,%edi /* terms */ movl TERM_SIZE,%eax mull PARAM4 /* n */ addl %eax,%edi movl 4(%edi),%edx /* terms[n] */ movl %edx,SUM_MS movl (%edi),%edx /* terms[n] */ movl %edx,SUM_MIDDLE xor %eax,%eax movl %eax,SUM_LS movb %al,OVERFLOWED subl TERM_SIZE,%edi decl PARAM4 js L_accum_done L_accum_loop: xor %eax,%eax movl %eax,ACCUM_MS movl %eax,ACCUM_MIDDLE movl SUM_MIDDLE,%eax mull (%esi) /* x ls long */ movl %edx,ACCUM_LS movl SUM_MIDDLE,%eax mull 4(%esi) /* x ms long */ addl %eax,ACCUM_LS adcl %edx,ACCUM_MIDDLE adcl $0,ACCUM_MS movl SUM_MS,%eax mull (%esi) /* x ls long */ addl %eax,ACCUM_LS adcl %edx,ACCUM_MIDDLE adcl $0,ACCUM_MS movl SUM_MS,%eax mull 4(%esi) /* x ms long */ addl %eax,ACCUM_MIDDLE adcl %edx,ACCUM_MS testb $0xff,OVERFLOWED jz L_no_overflow movl (%esi),%eax addl %eax,ACCUM_MIDDLE movl 4(%esi),%eax adcl %eax,ACCUM_MS /* This could overflow too */ L_no_overflow: /* * Now put the sum of next term and the accumulator * into the sum register */ movl ACCUM_LS,%eax addl (%edi),%eax /* term ls long */ movl %eax,SUM_LS movl ACCUM_MIDDLE,%eax adcl (%edi),%eax /* term ls long */ movl %eax,SUM_MIDDLE movl ACCUM_MS,%eax adcl 4(%edi),%eax /* term ms long */ movl %eax,SUM_MS sbbb %al,%al movb %al,OVERFLOWED /* Used in the next iteration */ subl TERM_SIZE,%edi decl PARAM4 jns L_accum_loop L_accum_done: movl PARAM1,%edi /* accum */ movl SUM_LS,%eax addl %eax,(%edi) movl SUM_MIDDLE,%eax adcl %eax,4(%edi) movl SUM_MS,%eax adcl %eax,8(%edi) popl %ebx popl %edi popl %esi leave ret |