Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 | /* * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines * * Author: Nicolas Pitre <nico@fluxnic.net> * - contributed to gcc-3.4 on Sep 30, 2003 * - adapted for the Linux kernel on Oct 2, 2003 */ /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. This file is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. In addition to the permissions in the GNU General Public License, the Free Software Foundation gives you unlimited permission to link the compiled version of this file into combinations with other programs, and to distribute those combinations without any restriction coming from the use of this file. (The General Public License restrictions do apply in other respects; for example, they cover modification of the file, and distribution when not linked into a combine executable.) This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include <linux/linkage.h> #include <asm/assembler.h> #include <asm/unwind.h> .macro ARM_DIV_BODY dividend, divisor, result, curbit #if __LINUX_ARM_ARCH__ >= 5 clz \curbit, \divisor clz \result, \dividend sub \result, \curbit, \result mov \curbit, #1 mov \divisor, \divisor, lsl \result mov \curbit, \curbit, lsl \result mov \result, #0 #else @ Initially shift the divisor left 3 bits if possible, @ set curbit accordingly. This allows for curbit to be located @ at the left end of each 4 bit nibbles in the division loop @ to save one loop in most cases. tst \divisor, #0xe0000000 moveq \divisor, \divisor, lsl #3 moveq \curbit, #8 movne \curbit, #1 @ Unless the divisor is very big, shift it up in multiples of @ four bits, since this is the amount of unwinding in the main @ division loop. Continue shifting until the divisor is @ larger than the dividend. 1: cmp \divisor, #0x10000000 cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #4 movlo \curbit, \curbit, lsl #4 blo 1b @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. 1: cmp \divisor, #0x80000000 cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #1 movlo \curbit, \curbit, lsl #1 blo 1b mov \result, #0 #endif @ Division loop 1: cmp \dividend, \divisor subhs \dividend, \dividend, \divisor orrhs \result, \result, \curbit cmp \dividend, \divisor, lsr #1 subhs \dividend, \dividend, \divisor, lsr #1 orrhs \result, \result, \curbit, lsr #1 cmp \dividend, \divisor, lsr #2 subhs \dividend, \dividend, \divisor, lsr #2 orrhs \result, \result, \curbit, lsr #2 cmp \dividend, \divisor, lsr #3 subhs \dividend, \dividend, \divisor, lsr #3 orrhs \result, \result, \curbit, lsr #3 cmp \dividend, #0 @ Early termination? movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? movne \divisor, \divisor, lsr #4 bne 1b .endm .macro ARM_DIV2_ORDER divisor, order #if __LINUX_ARM_ARCH__ >= 5 clz \order, \divisor rsb \order, \order, #31 #else cmp \divisor, #(1 << 16) movhs \divisor, \divisor, lsr #16 movhs \order, #16 movlo \order, #0 cmp \divisor, #(1 << 8) movhs \divisor, \divisor, lsr #8 addhs \order, \order, #8 cmp \divisor, #(1 << 4) movhs \divisor, \divisor, lsr #4 addhs \order, \order, #4 cmp \divisor, #(1 << 2) addhi \order, \order, #3 addls \order, \order, \divisor, lsr #1 #endif .endm .macro ARM_MOD_BODY dividend, divisor, order, spare #if __LINUX_ARM_ARCH__ >= 5 clz \order, \divisor clz \spare, \dividend sub \order, \order, \spare mov \divisor, \divisor, lsl \order #else mov \order, #0 @ Unless the divisor is very big, shift it up in multiples of @ four bits, since this is the amount of unwinding in the main @ division loop. Continue shifting until the divisor is @ larger than the dividend. 1: cmp \divisor, #0x10000000 cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #4 addlo \order, \order, #4 blo 1b @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. 1: cmp \divisor, #0x80000000 cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #1 addlo \order, \order, #1 blo 1b #endif @ Perform all needed subtractions to keep only the reminder. @ Do comparisons in batch of 4 first. subs \order, \order, #3 @ yes, 3 is intended here blt 2f 1: cmp \dividend, \divisor subhs \dividend, \dividend, \divisor cmp \dividend, \divisor, lsr #1 subhs \dividend, \dividend, \divisor, lsr #1 cmp \dividend, \divisor, lsr #2 subhs \dividend, \dividend, \divisor, lsr #2 cmp \dividend, \divisor, lsr #3 subhs \dividend, \dividend, \divisor, lsr #3 cmp \dividend, #1 mov \divisor, \divisor, lsr #4 subsge \order, \order, #4 bge 1b tst \order, #3 teqne \dividend, #0 beq 5f @ Either 1, 2 or 3 comparison/subtractions are left. 2: cmn \order, #2 blt 4f beq 3f cmp \dividend, \divisor subhs \dividend, \dividend, \divisor mov \divisor, \divisor, lsr #1 3: cmp \dividend, \divisor subhs \dividend, \dividend, \divisor mov \divisor, \divisor, lsr #1 4: cmp \dividend, \divisor subhs \dividend, \dividend, \divisor 5: .endm #ifdef CONFIG_ARM_PATCH_IDIV .align 3 #endif ENTRY(__udivsi3) ENTRY(__aeabi_uidiv) UNWIND(.fnstart) subs r2, r1, #1 reteq lr bcc Ldiv0 cmp r0, r1 bls 11f tst r1, r2 beq 12f ARM_DIV_BODY r0, r1, r2, r3 mov r0, r2 ret lr 11: moveq r0, #1 movne r0, #0 ret lr 12: ARM_DIV2_ORDER r1, r2 mov r0, r0, lsr r2 ret lr UNWIND(.fnend) ENDPROC(__udivsi3) ENDPROC(__aeabi_uidiv) ENTRY(__umodsi3) UNWIND(.fnstart) subs r2, r1, #1 @ compare divisor with 1 bcc Ldiv0 cmpne r0, r1 @ compare dividend with divisor moveq r0, #0 tsthi r1, r2 @ see if divisor is power of 2 andeq r0, r0, r2 retls lr ARM_MOD_BODY r0, r1, r2, r3 ret lr UNWIND(.fnend) ENDPROC(__umodsi3) #ifdef CONFIG_ARM_PATCH_IDIV .align 3 #endif ENTRY(__divsi3) ENTRY(__aeabi_idiv) UNWIND(.fnstart) cmp r1, #0 eor ip, r0, r1 @ save the sign of the result. beq Ldiv0 rsbmi r1, r1, #0 @ loops below use unsigned. subs r2, r1, #1 @ division by 1 or -1 ? beq 10f movs r3, r0 rsbmi r3, r0, #0 @ positive dividend value cmp r3, r1 bls 11f tst r1, r2 @ divisor is power of 2 ? beq 12f ARM_DIV_BODY r3, r1, r0, r2 cmp ip, #0 rsbmi r0, r0, #0 ret lr 10: teq ip, r0 @ same sign ? rsbmi r0, r0, #0 ret lr 11: movlo r0, #0 moveq r0, ip, asr #31 orreq r0, r0, #1 ret lr 12: ARM_DIV2_ORDER r1, r2 cmp ip, #0 mov r0, r3, lsr r2 rsbmi r0, r0, #0 ret lr UNWIND(.fnend) ENDPROC(__divsi3) ENDPROC(__aeabi_idiv) ENTRY(__modsi3) UNWIND(.fnstart) cmp r1, #0 beq Ldiv0 rsbmi r1, r1, #0 @ loops below use unsigned. movs ip, r0 @ preserve sign of dividend rsbmi r0, r0, #0 @ if negative make positive subs r2, r1, #1 @ compare divisor with 1 cmpne r0, r1 @ compare dividend with divisor moveq r0, #0 tsthi r1, r2 @ see if divisor is power of 2 andeq r0, r0, r2 bls 10f ARM_MOD_BODY r0, r1, r2, r3 10: cmp ip, #0 rsbmi r0, r0, #0 ret lr UNWIND(.fnend) ENDPROC(__modsi3) #ifdef CONFIG_AEABI ENTRY(__aeabi_uidivmod) UNWIND(.fnstart) UNWIND(.save {r0, r1, ip, lr} ) stmfd sp!, {r0, r1, ip, lr} bl __aeabi_uidiv ldmfd sp!, {r1, r2, ip, lr} mul r3, r0, r2 sub r1, r1, r3 ret lr UNWIND(.fnend) ENDPROC(__aeabi_uidivmod) ENTRY(__aeabi_idivmod) UNWIND(.fnstart) UNWIND(.save {r0, r1, ip, lr} ) stmfd sp!, {r0, r1, ip, lr} bl __aeabi_idiv ldmfd sp!, {r1, r2, ip, lr} mul r3, r0, r2 sub r1, r1, r3 ret lr UNWIND(.fnend) ENDPROC(__aeabi_idivmod) #endif Ldiv0: UNWIND(.fnstart) UNWIND(.pad #4) UNWIND(.save {lr}) str lr, [sp, #-8]! bl __div0 mov r0, #0 @ About as wrong as it could be. ldr pc, [sp], #8 UNWIND(.fnend) ENDPROC(Ldiv0) |