Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Scalar AES core transform * * Copyright (C) 2017 Linaro Ltd. * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> */ #include <linux/linkage.h> #include <asm/assembler.h> #include <asm/cache.h> .text .align 5 rk .req r0 rounds .req r1 in .req r2 out .req r3 ttab .req ip t0 .req lr t1 .req r2 t2 .req r3 .macro __select, out, in, idx .if __LINUX_ARM_ARCH__ < 7 and \out, \in, #0xff << (8 * \idx) .else ubfx \out, \in, #(8 * \idx), #8 .endif .endm .macro __load, out, in, idx, sz, op .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz] .else ldr\op \out, [ttab, \in, lsl #\sz] .endif .endm .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr __select \out0, \in0, 0 __select t0, \in1, 1 __load \out0, \out0, 0, \sz, \op __load t0, t0, 1, \sz, \op .if \enc __select \out1, \in1, 0 __select t1, \in2, 1 .else __select \out1, \in3, 0 __select t1, \in0, 1 .endif __load \out1, \out1, 0, \sz, \op __select t2, \in2, 2 __load t1, t1, 1, \sz, \op __load t2, t2, 2, \sz, \op eor \out0, \out0, t0, ror #24 __select t0, \in3, 3 .if \enc __select \t3, \in3, 2 __select \t4, \in0, 3 .else __select \t3, \in1, 2 __select \t4, \in2, 3 .endif __load \t3, \t3, 2, \sz, \op __load t0, t0, 3, \sz, \op __load \t4, \t4, 3, \sz, \op .ifnb \oldcpsr /* * This is the final round and we're done with all data-dependent table * lookups, so we can safely re-enable interrupts. */ restore_irqs \oldcpsr .endif eor \out1, \out1, t1, ror #24 eor \out0, \out0, t2, ror #16 ldm rk!, {t1, t2} eor \out1, \out1, \t3, ror #16 eor \out0, \out0, t0, ror #8 eor \out1, \out1, \t4, ror #8 eor \out0, \out0, t1 eor \out1, \out1, t2 .endm .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr .endm .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr .endm .macro do_crypt, round, ttab, ltab, bsz push {r3-r11, lr} // Load keys first, to reduce latency in case they're not cached yet. ldm rk!, {r8-r11} ldr r4, [in] ldr r5, [in, #4] ldr r6, [in, #8] ldr r7, [in, #12] #ifdef CONFIG_CPU_BIG_ENDIAN rev_l r4, t0 rev_l r5, t0 rev_l r6, t0 rev_l r7, t0 #endif eor r4, r4, r8 eor r5, r5, r9 eor r6, r6, r10 eor r7, r7, r11 mov_l ttab, \ttab /* * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into * L1 cache, assuming cacheline size >= 32. This is a hardening measure * intended to make cache-timing attacks more difficult. They may not * be fully prevented, however; see the paper * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf * ("Cache-timing attacks on AES") for a discussion of the many * difficulties involved in writing truly constant-time AES software. */ save_and_disable_irqs t0 .set i, 0 .rept 1024 / 128 ldr r8, [ttab, #i + 0] ldr r9, [ttab, #i + 32] ldr r10, [ttab, #i + 64] ldr r11, [ttab, #i + 96] .set i, i + 128 .endr push {t0} // oldcpsr tst rounds, #2 bne 1f 0: \round r8, r9, r10, r11, r4, r5, r6, r7 \round r4, r5, r6, r7, r8, r9, r10, r11 1: subs rounds, rounds, #4 \round r8, r9, r10, r11, r4, r5, r6, r7 bls 2f \round r4, r5, r6, r7, r8, r9, r10, r11 b 0b 2: .ifb \ltab add ttab, ttab, #1 .else mov_l ttab, \ltab // Prefetch inverse S-box for final round; see explanation above .set i, 0 .rept 256 / 64 ldr t0, [ttab, #i + 0] ldr t1, [ttab, #i + 32] .set i, i + 64 .endr .endif pop {rounds} // oldcpsr \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds #ifdef CONFIG_CPU_BIG_ENDIAN rev_l r4, t0 rev_l r5, t0 rev_l r6, t0 rev_l r7, t0 #endif ldr out, [sp] str r4, [out] str r5, [out, #4] str r6, [out, #8] str r7, [out, #12] pop {r3-r11, pc} .align 3 .ltorg .endm ENTRY(__aes_arm_encrypt) do_crypt fround, crypto_ft_tab,, 2 ENDPROC(__aes_arm_encrypt) .align 5 ENTRY(__aes_arm_decrypt) do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0 ENDPROC(__aes_arm_decrypt) |