Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | /* $Id: mul.S,v 1.4 1996/09/30 02:22:32 davem Exp $ * mul.S: This routine was taken from glibc-1.09 and is covered * by the GNU Library General Public License Version 2. */ /* * Signed multiply, from Appendix E of the Sparc Version 8 * Architecture Manual. */ /* * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of * the 64-bit product). * * This code optimizes short (less than 13-bit) multiplies. */ .globl .mul .mul: mov %o0, %y ! multiplier -> Y andncc %o0, 0xfff, %g0 ! test bits 12..31 be Lmul_shortway ! if zero, can do it the short way andcc %g0, %g0, %o4 ! zero the partial product and clear N and V /* * Long multiply. 32 steps, followed by a final shift step. */ mulscc %o4, %o1, %o4 ! 1 mulscc %o4, %o1, %o4 ! 2 mulscc %o4, %o1, %o4 ! 3 mulscc %o4, %o1, %o4 ! 4 mulscc %o4, %o1, %o4 ! 5 mulscc %o4, %o1, %o4 ! 6 mulscc %o4, %o1, %o4 ! 7 mulscc %o4, %o1, %o4 ! 8 mulscc %o4, %o1, %o4 ! 9 mulscc %o4, %o1, %o4 ! 10 mulscc %o4, %o1, %o4 ! 11 mulscc %o4, %o1, %o4 ! 12 mulscc %o4, %o1, %o4 ! 13 mulscc %o4, %o1, %o4 ! 14 mulscc %o4, %o1, %o4 ! 15 mulscc %o4, %o1, %o4 ! 16 mulscc %o4, %o1, %o4 ! 17 mulscc %o4, %o1, %o4 ! 18 mulscc %o4, %o1, %o4 ! 19 mulscc %o4, %o1, %o4 ! 20 mulscc %o4, %o1, %o4 ! 21 mulscc %o4, %o1, %o4 ! 22 mulscc %o4, %o1, %o4 ! 23 mulscc %o4, %o1, %o4 ! 24 mulscc %o4, %o1, %o4 ! 25 mulscc %o4, %o1, %o4 ! 26 mulscc %o4, %o1, %o4 ! 27 mulscc %o4, %o1, %o4 ! 28 mulscc %o4, %o1, %o4 ! 29 mulscc %o4, %o1, %o4 ! 30 mulscc %o4, %o1, %o4 ! 31 mulscc %o4, %o1, %o4 ! 32 mulscc %o4, %g0, %o4 ! final shift ! If %o0 was negative, the result is ! (%o0 * %o1) + (%o1 << 32)) ! We fix that here. #if 0 tst %o0 bge 1f rd %y, %o0 ! %o0 was indeed negative; fix upper 32 bits of result by subtracting ! %o1 (i.e., return %o4 - %o1 in %o1). retl sub %o4, %o1, %o1 1: retl mov %o4, %o1 #else /* Faster code adapted from tege@sics.se's code for umul.S. */ sra %o0, 31, %o2 ! make mask from sign bit and %o1, %o2, %o2 ! %o2 = 0 or %o1, depending on sign of %o0 rd %y, %o0 ! get lower half of product retl sub %o4, %o2, %o1 ! subtract compensation ! and put upper half in place #endif Lmul_shortway: /* * Short multiply. 12 steps, followed by a final shift step. * The resulting bits are off by 12 and (32-12) = 20 bit positions, * but there is no problem with %o0 being negative (unlike above). */ mulscc %o4, %o1, %o4 ! 1 mulscc %o4, %o1, %o4 ! 2 mulscc %o4, %o1, %o4 ! 3 mulscc %o4, %o1, %o4 ! 4 mulscc %o4, %o1, %o4 ! 5 mulscc %o4, %o1, %o4 ! 6 mulscc %o4, %o1, %o4 ! 7 mulscc %o4, %o1, %o4 ! 8 mulscc %o4, %o1, %o4 ! 9 mulscc %o4, %o1, %o4 ! 10 mulscc %o4, %o1, %o4 ! 11 mulscc %o4, %o1, %o4 ! 12 mulscc %o4, %g0, %o4 ! final shift /* * %o4 has 20 of the bits that should be in the low part of the * result; %y has the bottom 12 (as %y's top 12). That is: * * %o4 %y * +----------------+----------------+ * | -12- | -20- | -12- | -20- | * +------(---------+------)---------+ * --hi-- ----low-part---- * * The upper 12 bits of %o4 should be sign-extended to form the * high part of the product (i.e., highpart = %o4 >> 20). */ rd %y, %o5 sll %o4, 12, %o0 ! shift middle bits left 12 srl %o5, 20, %o5 ! shift low bits right 20, zero fill at left or %o5, %o0, %o0 ! construct low part of result retl sra %o4, 20, %o1 ! ... and extract high part of result .globl .mul_patch .mul_patch: smul %o0, %o1, %o0 retl rd %y, %o1 nop |