Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions * * Copyright (C) 2013 - 2017 Linaro Ltd. * Copyright (C) 2024 Google LLC * * Author: Ard Biesheuvel <ardb@kernel.org> */ #include <linux/linkage.h> #include <asm/assembler.h> .text .arch armv8-a+crypto .macro load_round_keys, rk, nr, tmp sub w\tmp, \nr, #10 add \tmp, \rk, w\tmp, sxtw #4 ld1 {v10.4s-v13.4s}, [\rk] ld1 {v14.4s-v17.4s}, [\tmp], #64 ld1 {v18.4s-v21.4s}, [\tmp], #64 ld1 {v3.4s-v5.4s}, [\tmp] .endm .macro dround, va, vb, vk aese \va\().16b, \vk\().16b aesmc \va\().16b, \va\().16b aese \vb\().16b, \vk\().16b aesmc \vb\().16b, \vb\().16b .endm .macro aes_encrypt, va, vb, nr tbz \nr, #2, .L\@ dround \va, \vb, v10 dround \va, \vb, v11 tbz \nr, #1, .L\@ dround \va, \vb, v12 dround \va, \vb, v13 .L\@: .irp v, v14, v15, v16, v17, v18, v19, v20, v21, v3 dround \va, \vb, \v .endr aese \va\().16b, v4.16b aese \vb\().16b, v4.16b .endm .macro aes_ccm_do_crypt,enc load_round_keys x3, w4, x10 ld1 {v0.16b}, [x5] /* load mac */ cbz x2, ce_aes_ccm_final ldr x8, [x6, #8] /* load lower ctr */ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ 0: /* outer loop */ ld1 {v1.8b}, [x6] /* load upper ctr */ prfm pldl1strm, [x1] add x8, x8, #1 rev x9, x8 ins v1.d[1], x9 /* no carry in lower ctr */ aes_encrypt v0, v1, w4 subs w2, w2, #16 bmi ce_aes_ccm_crypt_tail ld1 {v2.16b}, [x1], #16 /* load next input block */ .if \enc == 1 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ eor v6.16b, v1.16b, v2.16b /* xor with crypted ctr */ .else eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */ eor v6.16b, v2.16b, v5.16b /* final round enc */ .endif eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ st1 {v6.16b}, [x0], #16 /* write output block */ bne 0b CPU_LE( rev x8, x8 ) str x8, [x6, #8] /* store lsb end of ctr (BE) */ cbnz x7, ce_aes_ccm_final st1 {v0.16b}, [x5] /* store mac */ ret .endm SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail) eor v0.16b, v0.16b, v5.16b /* final round mac */ eor v1.16b, v1.16b, v5.16b /* final round enc */ add x1, x1, w2, sxtw /* rewind the input pointer (w2 < 0) */ add x0, x0, w2, sxtw /* rewind the output pointer */ adr_l x8, .Lpermute /* load permute vectors */ add x9, x8, w2, sxtw sub x8, x8, w2, sxtw ld1 {v7.16b-v8.16b}, [x9] ld1 {v9.16b}, [x8] ld1 {v2.16b}, [x1] /* load a full block of input */ tbl v1.16b, {v1.16b}, v7.16b /* move keystream to end of register */ eor v7.16b, v2.16b, v1.16b /* encrypt partial input block */ bif v2.16b, v7.16b, v22.16b /* select plaintext */ tbx v7.16b, {v6.16b}, v8.16b /* insert output from previous iteration */ tbl v2.16b, {v2.16b}, v9.16b /* copy plaintext to start of v2 */ eor v0.16b, v0.16b, v2.16b /* fold plaintext into mac */ st1 {v7.16b}, [x0] /* store output block */ cbz x7, 0f SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL) ld1 {v1.16b}, [x7] /* load 1st ctriv */ aes_encrypt v0, v1, w4 /* final round key cancels out */ eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ 0: st1 {v0.16b}, [x5] /* store result */ ret SYM_FUNC_END(ce_aes_ccm_crypt_tail) /* * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, * u8 const rk[], u32 rounds, u8 mac[], * u8 ctr[], u8 const final_iv[]); * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, * u8 const rk[], u32 rounds, u8 mac[], * u8 ctr[], u8 const final_iv[]); */ SYM_FUNC_START(ce_aes_ccm_encrypt) movi v22.16b, #255 aes_ccm_do_crypt 1 SYM_FUNC_END(ce_aes_ccm_encrypt) SYM_FUNC_START(ce_aes_ccm_decrypt) movi v22.16b, #0 aes_ccm_do_crypt 0 SYM_FUNC_END(ce_aes_ccm_decrypt) .section ".rodata", "a" .align 6 .fill 15, 1, 0xff .Lpermute: .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf .fill 15, 1, 0xff |