Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 | /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/export.h> #include <linux/linkage.h> #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/reg.h> #include <asm/asm-offsets.h> #include <asm/cputable.h> #include <asm/thread_info.h> #include <asm/page.h> #include <asm/ptrace.h> #include <asm/asm-compat.h> /* * Load state from memory into VMX registers including VSCR. * Assumes the caller has enabled VMX in the MSR. */ _GLOBAL(load_vr_state) li r4,VRSTATE_VSCR lvx v0,r4,r3 mtvscr v0 REST_32VRS(0,r4,r3) blr EXPORT_SYMBOL(load_vr_state) _ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */ /* * Store VMX state into memory, including VSCR. * Assumes the caller has enabled VMX in the MSR. */ _GLOBAL(store_vr_state) SAVE_32VRS(0, r4, r3) mfvscr v0 li r4, VRSTATE_VSCR stvx v0, r4, r3 lvx v0, 0, r3 blr EXPORT_SYMBOL(store_vr_state) /* * Disable VMX for the task which had it previously, * and save its vector registers in its thread_struct. * Enables the VMX for use in the kernel on return. * On SMP we know the VMX is free, since we give it up every * switch (ie, no lazy save of the vector registers). * * Note that on 32-bit this can only use registers that will be * restored by fast_exception_return, i.e. r3 - r6, r10 and r11. */ _GLOBAL(load_up_altivec) mfmsr r5 /* grab the current MSR */ #ifdef CONFIG_PPC_BOOK3S_64 /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ ori r5,r5,MSR_RI #endif oris r5,r5,MSR_VEC@h MTMSRD(r5) /* enable use of AltiVec now */ isync /* * While userspace in general ignores VRSAVE, glibc uses it as a boolean * to optimise userspace context save/restore. Whenever we take an * altivec unavailable exception we must set VRSAVE to something non * zero. Set it to all 1s. See also the programming note in the ISA. */ mfspr r4,SPRN_VRSAVE cmpwi 0,r4,0 bne+ 1f li r4,-1 mtspr SPRN_VRSAVE,r4 1: /* enable use of VMX after return */ #ifdef CONFIG_PPC32 addi r5,r2,THREAD oris r9,r9,MSR_VEC@h #else ld r4,PACACURRENT(r13) addi r5,r4,THREAD /* Get THREAD */ oris r12,r12,MSR_VEC@h std r12,_MSR(r1) #ifdef CONFIG_PPC_BOOK3S_64 li r4,0 stb r4,PACASRR_VALID(r13) #endif #endif li r4,1 stb r4,THREAD_LOAD_VEC(r5) addi r6,r5,THREAD_VRSTATE li r10,VRSTATE_VSCR stw r4,THREAD_USED_VR(r5) lvx v0,r10,r6 mtvscr v0 REST_32VRS(0,r4,r6) /* restore registers and return */ blr _ASM_NOKPROBE_SYMBOL(load_up_altivec) /* * save_altivec(tsk) * Save the vector registers to its thread_struct */ _GLOBAL(save_altivec) addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r7,THREAD_VRSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) PPC_LCMPI 0,r7,0 bne 2f addi r7,r3,THREAD_VRSTATE 2: SAVE_32VRS(0,r4,r7) mfvscr v0 li r4,VRSTATE_VSCR stvx v0,r4,r7 lvx v0,0,r7 blr #ifdef CONFIG_VSX #ifdef CONFIG_PPC32 #error This asm code isn't ready for 32-bit kernels #endif /* * load_up_vsx(unused, unused, tsk) * Disable VSX for the task which had it previously, * and save its vector registers in its thread_struct. * Reuse the fp and vsx saves, but first check to see if they have * been saved already. */ _GLOBAL(load_up_vsx) /* Load FP and VSX registers if they haven't been done yet */ andi. r5,r12,MSR_FP beql+ load_up_fpu /* skip if already loaded */ andis. r5,r12,MSR_VEC@h beql+ load_up_altivec /* skip if already loaded */ #ifdef CONFIG_PPC_BOOK3S_64 /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ li r5,MSR_RI mtmsrd r5,1 #endif ld r4,PACACURRENT(r13) addi r4,r4,THREAD /* Get THREAD */ li r6,1 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ /* enable use of VSX after return */ oris r12,r12,MSR_VSX@h std r12,_MSR(r1) li r4,0 stb r4,PACASRR_VALID(r13) b fast_interrupt_return_srr #endif /* CONFIG_VSX */ /* * The routines below are in assembler so we can closely control the * usage of floating-point registers. These routines must be called * with preempt disabled. */ .data #ifdef CONFIG_PPC32 fpzero: .long 0 fpone: .long 0x3f800000 /* 1.0 in single-precision FP */ fphalf: .long 0x3f000000 /* 0.5 in single-precision FP */ #define LDCONST(fr, name) \ lis r11,name@ha; \ lfs fr,name@l(r11) #else fpzero: .quad 0 fpone: .quad 0x3ff0000000000000 /* 1.0 */ fphalf: .quad 0x3fe0000000000000 /* 0.5 */ #ifdef CONFIG_PPC_KERNEL_PCREL #define LDCONST(fr, name) \ pla r11,name@pcrel; \ lfd fr,0(r11) #else #define LDCONST(fr, name) \ addis r11,r2,name@toc@ha; \ lfd fr,name@toc@l(r11) #endif #endif .text /* * Internal routine to enable floating point and set FPSCR to 0. * Don't call it from C; it doesn't use the normal calling convention. */ SYM_FUNC_START_LOCAL(fpenable) #ifdef CONFIG_PPC32 stwu r1,-64(r1) #else stdu r1,-64(r1) #endif mfmsr r10 ori r11,r10,MSR_FP mtmsr r11 isync stfd fr0,24(r1) stfd fr1,16(r1) stfd fr31,8(r1) LDCONST(fr1, fpzero) mffs fr31 MTFSF_L(fr1) blr SYM_FUNC_END(fpenable) fpdisable: mtlr r12 MTFSF_L(fr31) lfd fr31,8(r1) lfd fr1,16(r1) lfd fr0,24(r1) mtmsr r10 isync addi r1,r1,64 blr /* * Vector add, floating point. */ _GLOBAL(vaddfp) mflr r12 bl fpenable li r0,4 mtctr r0 li r6,0 1: lfsx fr0,r4,r6 lfsx fr1,r5,r6 fadds fr0,fr0,fr1 stfsx fr0,r3,r6 addi r6,r6,4 bdnz 1b b fpdisable /* * Vector subtract, floating point. */ _GLOBAL(vsubfp) mflr r12 bl fpenable li r0,4 mtctr r0 li r6,0 1: lfsx fr0,r4,r6 lfsx fr1,r5,r6 fsubs fr0,fr0,fr1 stfsx fr0,r3,r6 addi r6,r6,4 bdnz 1b b fpdisable /* * Vector multiply and add, floating point. */ _GLOBAL(vmaddfp) mflr r12 bl fpenable stfd fr2,32(r1) li r0,4 mtctr r0 li r7,0 1: lfsx fr0,r4,r7 lfsx fr1,r5,r7 lfsx fr2,r6,r7 fmadds fr0,fr0,fr2,fr1 stfsx fr0,r3,r7 addi r7,r7,4 bdnz 1b lfd fr2,32(r1) b fpdisable /* * Vector negative multiply and subtract, floating point. */ _GLOBAL(vnmsubfp) mflr r12 bl fpenable stfd fr2,32(r1) li r0,4 mtctr r0 li r7,0 1: lfsx fr0,r4,r7 lfsx fr1,r5,r7 lfsx fr2,r6,r7 fnmsubs fr0,fr0,fr2,fr1 stfsx fr0,r3,r7 addi r7,r7,4 bdnz 1b lfd fr2,32(r1) b fpdisable /* * Vector reciprocal estimate. We just compute 1.0/x. * r3 -> destination, r4 -> source. */ _GLOBAL(vrefp) mflr r12 bl fpenable li r0,4 LDCONST(fr1, fpone) mtctr r0 li r6,0 1: lfsx fr0,r4,r6 fdivs fr0,fr1,fr0 stfsx fr0,r3,r6 addi r6,r6,4 bdnz 1b b fpdisable /* * Vector reciprocal square-root estimate, floating point. * We use the frsqrte instruction for the initial estimate followed * by 2 iterations of Newton-Raphson to get sufficient accuracy. * r3 -> destination, r4 -> source. */ _GLOBAL(vrsqrtefp) mflr r12 bl fpenable stfd fr2,32(r1) stfd fr3,40(r1) stfd fr4,48(r1) stfd fr5,56(r1) li r0,4 LDCONST(fr4, fpone) LDCONST(fr5, fphalf) mtctr r0 li r6,0 1: lfsx fr0,r4,r6 frsqrte fr1,fr0 /* r = frsqrte(s) */ fmuls fr3,fr1,fr0 /* r * s */ fmuls fr2,fr1,fr5 /* r * 0.5 */ fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ fmuls fr3,fr1,fr0 /* r * s */ fmuls fr2,fr1,fr5 /* r * 0.5 */ fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ stfsx fr1,r3,r6 addi r6,r6,4 bdnz 1b lfd fr5,56(r1) lfd fr4,48(r1) lfd fr3,40(r1) lfd fr2,32(r1) b fpdisable |