Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 | .file "div_Xsig.S" /*---------------------------------------------------------------------------+ | div_Xsig.S | | | | Division subroutine for 96 bit quantities | | | | Copyright (C) 1994,1995 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | Australia. E-mail billm@jacobi.maths.monash.edu.au | | | | | +---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------+ | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and | | put the 96 bit result at the location d. | | | | The result may not be accurate to 96 bits. It is intended for use where | | a result better than 64 bits is required. The result should usually be | | good to at least 94 bits. | | The returned result is actually divided by one half. This is done to | | prevent overflow. | | | | .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd | | | | void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) | | | +---------------------------------------------------------------------------*/ #include "exception.h" #include "fpu_emu.h" #define XsigLL(x) (x) #define XsigL(x) 4(x) #define XsigH(x) 8(x) #ifndef NON_REENTRANT_FPU /* Local storage on the stack: Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 */ #define FPU_accum_3 -4(%ebp) #define FPU_accum_2 -8(%ebp) #define FPU_accum_1 -12(%ebp) #define FPU_accum_0 -16(%ebp) #define FPU_result_3 -20(%ebp) #define FPU_result_2 -24(%ebp) #define FPU_result_1 -28(%ebp) #else .data /* Local storage in a static area: Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 */ .align 4,0 FPU_accum_3: .long 0 FPU_accum_2: .long 0 FPU_accum_1: .long 0 FPU_accum_0: .long 0 FPU_result_3: .long 0 FPU_result_2: .long 0 FPU_result_1: .long 0 #endif NON_REENTRANT_FPU .text ENTRY(div_Xsig) pushl %ebp movl %esp,%ebp #ifndef NON_REENTRANT_FPU subl $28,%esp #endif NON_REENTRANT_FPU pushl %esi pushl %edi pushl %ebx movl PARAM1,%esi /* pointer to num */ movl PARAM2,%ebx /* pointer to denom */ #ifdef PARANOID testl $0x80000000, XsigH(%ebx) /* Divisor */ je L_bugged #endif PARANOID /*---------------------------------------------------------------------------+ | Divide: Return arg1/arg2 to arg3. | | | | The maximum returned value is (ignoring exponents) | | .ffffffff ffffffff | | ------------------ = 1.ffffffff fffffffe | | .80000000 00000000 | | and the minimum is | | .80000000 00000000 | | ------------------ = .80000000 00000001 (rounded) | | .ffffffff ffffffff | | | +---------------------------------------------------------------------------*/ /* Save extended dividend in local register */ /* Divide by 2 to prevent overflow */ clc movl XsigH(%esi),%eax rcrl %eax movl %eax,FPU_accum_3 movl XsigL(%esi),%eax rcrl %eax movl %eax,FPU_accum_2 movl XsigLL(%esi),%eax rcrl %eax movl %eax,FPU_accum_1 movl $0,%eax rcrl %eax movl %eax,FPU_accum_0 movl FPU_accum_2,%eax /* Get the current num */ movl FPU_accum_3,%edx /*----------------------------------------------------------------------*/ /* Initialization done. Do the first 32 bits. */ /* We will divide by a number which is too large */ movl XsigH(%ebx),%ecx addl $1,%ecx jnc LFirst_div_not_1 /* here we need to divide by 100000000h, i.e., no division at all.. */ mov %edx,%eax jmp LFirst_div_done LFirst_div_not_1: divl %ecx /* Divide the numerator by the augmented denom ms dw */ LFirst_div_done: movl %eax,FPU_result_3 /* Put the result in the answer */ mull XsigH(%ebx) /* mul by the ms dw of the denom */ subl %eax,FPU_accum_2 /* Subtract from the num local reg */ sbbl %edx,FPU_accum_3 movl FPU_result_3,%eax /* Get the result back */ mull XsigL(%ebx) /* now mul the ls dw of the denom */ subl %eax,FPU_accum_1 /* Subtract from the num local reg */ sbbl %edx,FPU_accum_2 sbbl $0,FPU_accum_3 je LDo_2nd_32_bits /* Must check for non-zero result here */ #ifdef PARANOID jb L_bugged_1 #endif PARANOID /* need to subtract another once of the denom */ incl FPU_result_3 /* Correct the answer */ movl XsigL(%ebx),%eax movl XsigH(%ebx),%edx subl %eax,FPU_accum_1 /* Subtract from the num local reg */ sbbl %edx,FPU_accum_2 #ifdef PARANOID sbbl $0,FPU_accum_3 jne L_bugged_1 /* Must check for non-zero result here */ #endif PARANOID /*----------------------------------------------------------------------*/ /* Half of the main problem is done, there is just a reduced numerator to handle now. Work with the second 32 bits, FPU_accum_0 not used from now on */ LDo_2nd_32_bits: movl FPU_accum_2,%edx /* get the reduced num */ movl FPU_accum_1,%eax /* need to check for possible subsequent overflow */ cmpl XsigH(%ebx),%edx jb LDo_2nd_div ja LPrevent_2nd_overflow cmpl XsigL(%ebx),%eax jb LDo_2nd_div LPrevent_2nd_overflow: /* The numerator is greater or equal, would cause overflow */ /* prevent overflow */ subl XsigL(%ebx),%eax sbbl XsigH(%ebx),%edx movl %edx,FPU_accum_2 movl %eax,FPU_accum_1 incl FPU_result_3 /* Reflect the subtraction in the answer */ #ifdef PARANOID je L_bugged_2 /* Can't bump the result to 1.0 */ #endif PARANOID LDo_2nd_div: cmpl $0,%ecx /* augmented denom msw */ jnz LSecond_div_not_1 /* %ecx == 0, we are dividing by 1.0 */ mov %edx,%eax jmp LSecond_div_done LSecond_div_not_1: divl %ecx /* Divide the numerator by the denom ms dw */ LSecond_div_done: movl %eax,FPU_result_2 /* Put the result in the answer */ mull XsigH(%ebx) /* mul by the ms dw of the denom */ subl %eax,FPU_accum_1 /* Subtract from the num local reg */ sbbl %edx,FPU_accum_2 #ifdef PARANOID jc L_bugged_2 #endif PARANOID movl FPU_result_2,%eax /* Get the result back */ mull XsigL(%ebx) /* now mul the ls dw of the denom */ subl %eax,FPU_accum_0 /* Subtract from the num local reg */ sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ sbbl $0,FPU_accum_2 #ifdef PARANOID jc L_bugged_2 #endif PARANOID jz LDo_3rd_32_bits #ifdef PARANOID cmpl $1,FPU_accum_2 jne L_bugged_2 #endif PARANOID /* need to subtract another once of the denom */ movl XsigL(%ebx),%eax movl XsigH(%ebx),%edx subl %eax,FPU_accum_0 /* Subtract from the num local reg */ sbbl %edx,FPU_accum_1 sbbl $0,FPU_accum_2 #ifdef PARANOID jc L_bugged_2 jne L_bugged_2 #endif PARANOID addl $1,FPU_result_2 /* Correct the answer */ adcl $0,FPU_result_3 #ifdef PARANOID jc L_bugged_2 /* Must check for non-zero result here */ #endif PARANOID /*----------------------------------------------------------------------*/ /* The division is essentially finished here, we just need to perform tidying operations. Deal with the 3rd 32 bits */ LDo_3rd_32_bits: /* We use an approximation for the third 32 bits. To take account of the 3rd 32 bits of the divisor (call them del), we subtract del * (a/b) */ movl FPU_result_3,%eax /* a/b */ mull XsigLL(%ebx) /* del */ subl %edx,FPU_accum_1 /* A borrow indicates that the result is negative */ jnb LTest_over movl XsigH(%ebx),%edx addl %edx,FPU_accum_1 subl $1,FPU_result_2 /* Adjust the answer */ sbbl $0,FPU_result_3 /* The above addition might not have been enough, check again. */ movl FPU_accum_1,%edx /* get the reduced num */ cmpl XsigH(%ebx),%edx /* denom */ jb LDo_3rd_div movl XsigH(%ebx),%edx addl %edx,FPU_accum_1 subl $1,FPU_result_2 /* Adjust the answer */ sbbl $0,FPU_result_3 jmp LDo_3rd_div LTest_over: movl FPU_accum_1,%edx /* get the reduced num */ /* need to check for possible subsequent overflow */ cmpl XsigH(%ebx),%edx /* denom */ jb LDo_3rd_div /* prevent overflow */ subl XsigH(%ebx),%edx movl %edx,FPU_accum_1 addl $1,FPU_result_2 /* Reflect the subtraction in the answer */ adcl $0,FPU_result_3 LDo_3rd_div: movl FPU_accum_0,%eax movl FPU_accum_1,%edx divl XsigH(%ebx) movl %eax,FPU_result_1 /* Rough estimate of third word */ movl PARAM3,%esi /* pointer to answer */ movl FPU_result_1,%eax movl %eax,XsigLL(%esi) movl FPU_result_2,%eax movl %eax,XsigL(%esi) movl FPU_result_3,%eax movl %eax,XsigH(%esi) L_exit: popl %ebx popl %edi popl %esi leave ret #ifdef PARANOID /* The logic is wrong if we got here */ L_bugged: pushl EX_INTERNAL|0x240 call EXCEPTION pop %ebx jmp L_exit L_bugged_1: pushl EX_INTERNAL|0x241 call EXCEPTION pop %ebx jmp L_exit L_bugged_2: pushl EX_INTERNAL|0x242 call EXCEPTION pop %ebx jmp L_exit #endif PARANOID |