1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
|
From e77020396508fc086d7a4d6137388b116e4a662f Mon Sep 17 00:00:00 2001
From: Jerome Forissier <jerome.forissier@linaro.org>
Date: Tue, 12 Jun 2018 08:40:03 +0200
Subject: [PATCH] core: crypto: arm64 ce: update AES CBC routines
Update the Aarch64 Crypto Extension accelerated CBC encryption/decryption
routines to the latest upstream implementation in the Linux kernel
(v4.17-rc7).
Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org>
Tested-by: Jerome Forissier <jerome.forissier@linaro.org> (HiKey960)
CC: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Fixes: https://github.com/OP-TEE/optee_os/issues/2355
Acked-by: Jens Wiklander <jens.wiklander@linaro.org>
---
core/lib/libtomcrypt/src/ciphers/aes_armv8a_ce.c | 8 +--
.../src/ciphers/aes_modes_armv8a_ce_a64.S | 70 ++++++++++++----------
2 files changed, 41 insertions(+), 37 deletions(-)
diff --git a/core/lib/libtomcrypt/src/ciphers/aes_armv8a_ce.c b/core/lib/libtomcrypt/src/ciphers/aes_armv8a_ce.c
index cd99e45..873435e 100644
--- a/core/lib/libtomcrypt/src/ciphers/aes_armv8a_ce.c
+++ b/core/lib/libtomcrypt/src/ciphers/aes_armv8a_ce.c
@@ -59,9 +59,9 @@ void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
int blocks, int first);
void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
- int blocks, u8 iv[], int first);
+ int blocks, u8 iv[]);
void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
- int blocks, u8 iv[], int first);
+ int blocks, u8 iv[]);
void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
int blocks, u8 ctr[], int first);
void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
@@ -250,7 +250,7 @@ static int aes_cbc_encrypt_nblocks(const unsigned char *pt, unsigned char *ct,
rk = (u8 *)skey->rijndael.eK;
tomcrypt_arm_neon_enable(&state);
- ce_aes_cbc_encrypt(ct, pt, rk, Nr, blocks, IV, 1);
+ ce_aes_cbc_encrypt(ct, pt, rk, Nr, blocks, IV);
tomcrypt_arm_neon_disable(&state);
return CRYPT_OK;
@@ -273,7 +273,7 @@ static int aes_cbc_decrypt_nblocks(const unsigned char *ct, unsigned char *pt,
rk = (u8 *)skey->rijndael.dK;
tomcrypt_arm_neon_enable(&state);
- ce_aes_cbc_decrypt(pt, ct, rk, Nr, blocks, IV, 1);
+ ce_aes_cbc_decrypt(pt, ct, rk, Nr, blocks, IV);
tomcrypt_arm_neon_disable(&state);
return CRYPT_OK;
diff --git a/core/lib/libtomcrypt/src/ciphers/aes_modes_armv8a_ce_a64.S b/core/lib/libtomcrypt/src/ciphers/aes_modes_armv8a_ce_a64.S
index 04a4b06..58aa05e 100644
--- a/core/lib/libtomcrypt/src/ciphers/aes_modes_armv8a_ce_a64.S
+++ b/core/lib/libtomcrypt/src/ciphers/aes_modes_armv8a_ce_a64.S
@@ -329,55 +329,61 @@ ENDPROC(ce_aes_ecb_decrypt)
/*
* aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
- * int blocks, u8 iv[], int first)
+ * int blocks, u8 iv[])
* aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
- * int blocks, u8 iv[], int first)
+ * int blocks, u8 iv[])
*/
ENTRY(ce_aes_cbc_encrypt)
- cbz w6, .Lcbcencloop
-
- ld1 {v0.16b}, [x5] /* get iv */
- enc_prepare w3, x2, x5
+ ld1 {v4.16b}, [x5] /* get iv */
+ enc_prepare w3, x2, x6
+.Lcbcencloop4x:
+ subs w4, w4, #4
+ bmi .Lcbcenc1x
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
+ eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
+ encrypt_block v0, w3, x2, x6, w7
+ eor v1.16b, v1.16b, v0.16b
+ encrypt_block v1, w3, x2, x6, w7
+ eor v2.16b, v2.16b, v1.16b
+ encrypt_block v2, w3, x2, x6, w7
+ eor v3.16b, v3.16b, v2.16b
+ encrypt_block v3, w3, x2, x6, w7
+ st1 {v0.16b-v3.16b}, [x0], #64
+ mov v4.16b, v3.16b
+ b .Lcbcencloop4x
+.Lcbcenc1x:
+ adds w4, w4, #4
+ beq .Lcbcencout
.Lcbcencloop:
- ld1 {v1.16b}, [x1], #16 /* get next pt block */
- eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */
- encrypt_block v0, w3, x2, x5, w6
- st1 {v0.16b}, [x0], #16
+ ld1 {v0.16b}, [x1], #16 /* get next pt block */
+ eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
+ encrypt_block v4, w3, x2, x6, w7
+ st1 {v4.16b}, [x0], #16
subs w4, w4, #1
bne .Lcbcencloop
- st1 {v0.16b}, [x5] /* save iv for later */
+.Lcbcencout:
+ st1 {v4.16b}, [x5] /* return iv */
ret
ENDPROC(ce_aes_cbc_encrypt)
ENTRY(ce_aes_cbc_decrypt)
- FRAME_PUSH
- cbz w6, .LcbcdecloopNx
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
ld1 {v7.16b}, [x5] /* get iv */
- dec_prepare w3, x2, x5
+ dec_prepare w3, x2, x6
.LcbcdecloopNx:
-#if INTERLEAVE >= 2
- subs w4, w4, #INTERLEAVE
+ subs w4, w4, #4
bmi .Lcbcdec1x
-#if INTERLEAVE == 2
- ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
- mov v2.16b, v0.16b
- mov v3.16b, v1.16b
- do_decrypt_block2x
- eor v0.16b, v0.16b, v7.16b
- eor v1.16b, v1.16b, v2.16b
- mov v7.16b, v3.16b
- st1 {v0.16b-v1.16b}, [x0], #32
-#else
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
mov v4.16b, v0.16b
mov v5.16b, v1.16b
mov v6.16b, v2.16b
- do_decrypt_block4x
+ bl aes_decrypt_block4x
sub x1, x1, #16
eor v0.16b, v0.16b, v7.16b
eor v1.16b, v1.16b, v4.16b
@@ -385,24 +391,22 @@ ENTRY(ce_aes_cbc_decrypt)
eor v2.16b, v2.16b, v5.16b
eor v3.16b, v3.16b, v6.16b
st1 {v0.16b-v3.16b}, [x0], #64
-#endif
b .LcbcdecloopNx
.Lcbcdec1x:
- adds w4, w4, #INTERLEAVE
+ adds w4, w4, #4
beq .Lcbcdecout
-#endif
.Lcbcdecloop:
ld1 {v1.16b}, [x1], #16 /* get next ct block */
mov v0.16b, v1.16b /* ...and copy to v0 */
- decrypt_block v0, w3, x2, x5, w6
+ decrypt_block v0, w3, x2, x6, w7
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
mov v7.16b, v1.16b /* ct is next iv */
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
bne .Lcbcdecloop
.Lcbcdecout:
- st1 {v1.16b}, [x5] /* save iv for later */
- FRAME_POP
+ st1 {v7.16b}, [x5] /* return iv */
+ ldp x29, x30, [sp], #16
ret
ENDPROC(ce_aes_cbc_decrypt)
--
2.7.4
|