Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 | /* SPDX-License-Identifier: GPL-2.0 */ .file "wm_sqrt.S" /*---------------------------------------------------------------------------+ | wm_sqrt.S | | | | Fixed point arithmetic square root evaluation. | | | | Copyright (C) 1992,1993,1995,1997 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | Australia. E-mail billm@suburbia.net | | | | Call from C as: | | int wm_sqrt(FPU_REG *n, unsigned int control_word) | | | +---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------+ | wm_sqrt(FPU_REG *n, unsigned int control_word) | | returns the square root of n in n. | | | | Use Newton's method to compute the square root of a number, which must | | be in the range [1.0 .. 4.0), to 64 bits accuracy. | | Does not check the sign or tag of the argument. | | Sets the exponent, but not the sign or tag of the result. | | | | The guess is kept in %esi:%edi | +---------------------------------------------------------------------------*/ #include "exception.h" #include "fpu_emu.h" #ifndef NON_REENTRANT_FPU /* Local storage on the stack: */ #define FPU_accum_3 -4(%ebp) /* ms word */ #define FPU_accum_2 -8(%ebp) #define FPU_accum_1 -12(%ebp) #define FPU_accum_0 -16(%ebp) /* * The de-normalised argument: * sq_2 sq_1 sq_0 * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 * ^ binary point here */ #define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ #define FPU_fsqrt_arg_1 -24(%ebp) #define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ #else /* Local storage in a static area: */ .data .align 4,0 FPU_accum_3: .long 0 /* ms word */ FPU_accum_2: .long 0 FPU_accum_1: .long 0 FPU_accum_0: .long 0 /* The de-normalised argument: sq_2 sq_1 sq_0 b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 ^ binary point here */ FPU_fsqrt_arg_2: .long 0 /* ms word */ FPU_fsqrt_arg_1: .long 0 FPU_fsqrt_arg_0: .long 0 /* ls word, at most the ms bit is set */ #endif /* NON_REENTRANT_FPU */ .text SYM_FUNC_START(wm_sqrt) pushl %ebp movl %esp,%ebp #ifndef NON_REENTRANT_FPU subl $28,%esp #endif /* NON_REENTRANT_FPU */ pushl %esi pushl %edi pushl %ebx movl PARAM1,%esi movl SIGH(%esi),%eax movl SIGL(%esi),%ecx xorl %edx,%edx /* We use a rough linear estimate for the first guess.. */ cmpw EXP_BIAS,EXP(%esi) jnz sqrt_arg_ge_2 shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ rcrl $1,%ecx rcrl $1,%edx sqrt_arg_ge_2: /* From here on, n is never accessed directly again until it is replaced by the answer. */ movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ movl %ecx,FPU_fsqrt_arg_1 movl %edx,FPU_fsqrt_arg_0 /* Make a linear first estimate */ shrl $1,%eax addl $0x40000000,%eax movl $0xaaaaaaaa,%ecx mull %ecx shll %edx /* max result was 7fff... */ testl $0x80000000,%edx /* but min was 3fff... */ jnz sqrt_prelim_no_adjust movl $0x80000000,%edx /* round up */ sqrt_prelim_no_adjust: movl %edx,%esi /* Our first guess */ /* We have now computed (approx) (2 + x) / 3, which forms the basis for a few iterations of Newton's method */ movl FPU_fsqrt_arg_2,%ecx /* ms word */ /* * From our initial estimate, three iterations are enough to get us * to 30 bits or so. This will then allow two iterations at better * precision to complete the process. */ /* Compute (g + n/g)/2 at each iteration (g is the guess). */ shrl %ecx /* Doing this first will prevent a divide */ /* overflow later. */ movl %ecx,%edx /* msw of the arg / 2 */ divl %esi /* current estimate */ shrl %esi /* divide by 2 */ addl %eax,%esi /* the new estimate */ movl %ecx,%edx divl %esi shrl %esi addl %eax,%esi movl %ecx,%edx divl %esi shrl %esi addl %eax,%esi /* * Now that an estimate accurate to about 30 bits has been obtained (in %esi), * we improve it to 60 bits or so. * * The strategy from now on is to compute new estimates from * guess := guess + (n - guess^2) / (2 * guess) */ /* First, find the square of the guess */ movl %esi,%eax mull %esi /* guess^2 now in %edx:%eax */ movl FPU_fsqrt_arg_1,%ecx subl %ecx,%eax movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ sbbl %ecx,%edx jnc sqrt_stage_2_positive /* Subtraction gives a negative result, negate the result before division. */ notl %edx notl %eax addl $1,%eax adcl $0,%edx divl %esi movl %eax,%ecx movl %edx,%eax divl %esi jmp sqrt_stage_2_finish sqrt_stage_2_positive: divl %esi movl %eax,%ecx movl %edx,%eax divl %esi notl %ecx notl %eax addl $1,%eax adcl $0,%ecx sqrt_stage_2_finish: sarl $1,%ecx /* divide by 2 */ rcrl $1,%eax /* Form the new estimate in %esi:%edi */ movl %eax,%edi addl %ecx,%esi jnz sqrt_stage_2_done /* result should be [1..2) */ #ifdef PARANOID /* It should be possible to get here only if the arg is ffff....ffff */ cmpl $0xffffffff,FPU_fsqrt_arg_1 jnz sqrt_stage_2_error #endif /* PARANOID */ /* The best rounded result. */ xorl %eax,%eax decl %eax movl %eax,%edi movl %eax,%esi movl $0x7fffffff,%eax jmp sqrt_round_result #ifdef PARANOID sqrt_stage_2_error: pushl EX_INTERNAL|0x213 call EXCEPTION #endif /* PARANOID */ sqrt_stage_2_done: /* Now the square root has been computed to better than 60 bits. */ /* Find the square of the guess. */ movl %edi,%eax /* ls word of guess */ mull %edi movl %edx,FPU_accum_1 movl %esi,%eax mull %esi movl %edx,FPU_accum_3 movl %eax,FPU_accum_2 movl %edi,%eax mull %esi addl %eax,FPU_accum_1 adcl %edx,FPU_accum_2 adcl $0,FPU_accum_3 /* movl %esi,%eax */ /* mull %edi */ addl %eax,FPU_accum_1 adcl %edx,FPU_accum_2 adcl $0,FPU_accum_3 /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ movl FPU_fsqrt_arg_0,%eax /* get normalized n */ subl %eax,FPU_accum_1 movl FPU_fsqrt_arg_1,%eax sbbl %eax,FPU_accum_2 movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ sbbl %eax,FPU_accum_3 jnc sqrt_stage_3_positive /* Subtraction gives a negative result, negate the result before division */ notl FPU_accum_1 notl FPU_accum_2 notl FPU_accum_3 addl $1,FPU_accum_1 adcl $0,FPU_accum_2 #ifdef PARANOID adcl $0,FPU_accum_3 /* This must be zero */ jz sqrt_stage_3_no_error sqrt_stage_3_error: pushl EX_INTERNAL|0x207 call EXCEPTION sqrt_stage_3_no_error: #endif /* PARANOID */ movl FPU_accum_2,%edx movl FPU_accum_1,%eax divl %esi movl %eax,%ecx movl %edx,%eax divl %esi sarl $1,%ecx /* divide by 2 */ rcrl $1,%eax /* prepare to round the result */ addl %ecx,%edi adcl $0,%esi jmp sqrt_stage_3_finished sqrt_stage_3_positive: movl FPU_accum_2,%edx movl FPU_accum_1,%eax divl %esi movl %eax,%ecx movl %edx,%eax divl %esi sarl $1,%ecx /* divide by 2 */ rcrl $1,%eax /* prepare to round the result */ notl %eax /* Negate the correction term */ notl %ecx addl $1,%eax adcl $0,%ecx /* carry here ==> correction == 0 */ adcl $0xffffffff,%esi addl %ecx,%edi adcl $0,%esi sqrt_stage_3_finished: /* * The result in %esi:%edi:%esi should be good to about 90 bits here, * and the rounding information here does not have sufficient accuracy * in a few rare cases. */ cmpl $0xffffffe0,%eax ja sqrt_near_exact_x cmpl $0x00000020,%eax jb sqrt_near_exact cmpl $0x7fffffe0,%eax jb sqrt_round_result cmpl $0x80000020,%eax jb sqrt_get_more_precision sqrt_round_result: /* Set up for rounding operations */ movl %eax,%edx movl %esi,%eax movl %edi,%ebx movl PARAM1,%edi movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ jmp fpu_reg_round sqrt_near_exact_x: /* First, the estimate must be rounded up. */ addl $1,%edi adcl $0,%esi sqrt_near_exact: /* * This is an easy case because x^1/2 is monotonic. * We need just find the square of our estimate, compare it * with the argument, and deduce whether our estimate is * above, below, or exact. We use the fact that the estimate * is known to be accurate to about 90 bits. */ movl %edi,%eax /* ls word of guess */ mull %edi movl %edx,%ebx /* 2nd ls word of square */ movl %eax,%ecx /* ls word of square */ movl %edi,%eax mull %esi addl %eax,%ebx addl %eax,%ebx #ifdef PARANOID cmp $0xffffffb0,%ebx jb sqrt_near_exact_ok cmp $0x00000050,%ebx ja sqrt_near_exact_ok pushl EX_INTERNAL|0x214 call EXCEPTION sqrt_near_exact_ok: #endif /* PARANOID */ or %ebx,%ebx js sqrt_near_exact_small jnz sqrt_near_exact_large or %ebx,%edx jnz sqrt_near_exact_large /* Our estimate is exactly the right answer */ xorl %eax,%eax jmp sqrt_round_result sqrt_near_exact_small: /* Our estimate is too small */ movl $0x000000ff,%eax jmp sqrt_round_result sqrt_near_exact_large: /* Our estimate is too large, we need to decrement it */ subl $1,%edi sbbl $0,%esi movl $0xffffff00,%eax jmp sqrt_round_result sqrt_get_more_precision: /* This case is almost the same as the above, except we start with an extra bit of precision in the estimate. */ stc /* The extra bit. */ rcll $1,%edi /* Shift the estimate left one bit */ rcll $1,%esi movl %edi,%eax /* ls word of guess */ mull %edi movl %edx,%ebx /* 2nd ls word of square */ movl %eax,%ecx /* ls word of square */ movl %edi,%eax mull %esi addl %eax,%ebx addl %eax,%ebx /* Put our estimate back to its original value */ stc /* The ms bit. */ rcrl $1,%esi /* Shift the estimate left one bit */ rcrl $1,%edi #ifdef PARANOID cmp $0xffffff60,%ebx jb sqrt_more_prec_ok cmp $0x000000a0,%ebx ja sqrt_more_prec_ok pushl EX_INTERNAL|0x215 call EXCEPTION sqrt_more_prec_ok: #endif /* PARANOID */ or %ebx,%ebx js sqrt_more_prec_small jnz sqrt_more_prec_large or %ebx,%ecx jnz sqrt_more_prec_large /* Our estimate is exactly the right answer */ movl $0x80000000,%eax jmp sqrt_round_result sqrt_more_prec_small: /* Our estimate is too small */ movl $0x800000ff,%eax jmp sqrt_round_result sqrt_more_prec_large: /* Our estimate is too large */ movl $0x7fffff00,%eax jmp sqrt_round_result SYM_FUNC_END(wm_sqrt) |