summaryrefslogtreecommitdiffstats
path: root/meta-rcar-gen3/recipes-bsp/optee/optee-os/0001-core-crypto-arm64-ce-update-AES-CBC-routines.patch
blob: 570752e658fea4d5ba848e6789face9340e2d6a6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
From e77020396508fc086d7a4d6137388b116e4a662f Mon Sep 17 00:00:00 2001
From: Jerome Forissier <jerome.forissier@linaro.org>
Date: Tue, 12 Jun 2018 08:40:03 +0200
Subject: [PATCH] core: crypto: arm64 ce: update AES CBC routines

Update the Aarch64 Crypto Extension accelerated CBC encryption/decryption
routines to the latest upstream implementation in the Linux kernel
(v4.17-rc7).

Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org>
Tested-by: Jerome Forissier <jerome.forissier@linaro.org> (HiKey960)
CC: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Fixes: https://github.com/OP-TEE/optee_os/issues/2355
Acked-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 core/lib/libtomcrypt/src/ciphers/aes_armv8a_ce.c   |  8 +--
 .../src/ciphers/aes_modes_armv8a_ce_a64.S          | 70 ++++++++++++----------
 2 files changed, 41 insertions(+), 37 deletions(-)

diff --git a/core/lib/libtomcrypt/src/ciphers/aes_armv8a_ce.c b/core/lib/libtomcrypt/src/ciphers/aes_armv8a_ce.c
index cd99e45..873435e 100644
--- a/core/lib/libtomcrypt/src/ciphers/aes_armv8a_ce.c
+++ b/core/lib/libtomcrypt/src/ciphers/aes_armv8a_ce.c
@@ -59,9 +59,9 @@ void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 			int blocks, int first);
 void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-			int blocks, u8 iv[], int first);
+			int blocks, u8 iv[]);
 void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-			int blocks, u8 iv[], int first);
+			int blocks, u8 iv[]);
 void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 			int blocks, u8 ctr[], int first);
 void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
@@ -250,7 +250,7 @@ static int aes_cbc_encrypt_nblocks(const unsigned char *pt, unsigned char *ct,
 	rk = (u8 *)skey->rijndael.eK;
 
 	tomcrypt_arm_neon_enable(&state);
-	ce_aes_cbc_encrypt(ct, pt, rk, Nr, blocks, IV, 1);
+	ce_aes_cbc_encrypt(ct, pt, rk, Nr, blocks, IV);
 	tomcrypt_arm_neon_disable(&state);
 
 	return CRYPT_OK;
@@ -273,7 +273,7 @@ static int aes_cbc_decrypt_nblocks(const unsigned char *ct, unsigned char *pt,
 	rk = (u8 *)skey->rijndael.dK;
 
 	tomcrypt_arm_neon_enable(&state);
-	ce_aes_cbc_decrypt(pt, ct, rk, Nr, blocks, IV, 1);
+	ce_aes_cbc_decrypt(pt, ct, rk, Nr, blocks, IV);
 	tomcrypt_arm_neon_disable(&state);
 
 	return CRYPT_OK;
diff --git a/core/lib/libtomcrypt/src/ciphers/aes_modes_armv8a_ce_a64.S b/core/lib/libtomcrypt/src/ciphers/aes_modes_armv8a_ce_a64.S
index 04a4b06..58aa05e 100644
--- a/core/lib/libtomcrypt/src/ciphers/aes_modes_armv8a_ce_a64.S
+++ b/core/lib/libtomcrypt/src/ciphers/aes_modes_armv8a_ce_a64.S
@@ -329,55 +329,61 @@ ENDPROC(ce_aes_ecb_decrypt)
 
 	/*
 	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, u8 iv[], int first)
+	 *		   int blocks, u8 iv[])
 	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, u8 iv[], int first)
+	 *		   int blocks, u8 iv[])
 	 */
 
 ENTRY(ce_aes_cbc_encrypt)
-	cbz		w6, .Lcbcencloop
-
-	ld1		{v0.16b}, [x5]			/* get iv */
-	enc_prepare	w3, x2, x5
+	ld1		{v4.16b}, [x5]			/* get iv */
+	enc_prepare	w3, x2, x6
 
+.Lcbcencloop4x:
+	subs		w4, w4, #4
+	bmi		.Lcbcenc1x
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
+	eor		v0.16b, v0.16b, v4.16b		/* ..and xor with iv */
+	encrypt_block	v0, w3, x2, x6, w7
+	eor		v1.16b, v1.16b, v0.16b
+	encrypt_block	v1, w3, x2, x6, w7
+	eor		v2.16b, v2.16b, v1.16b
+	encrypt_block	v2, w3, x2, x6, w7
+	eor		v3.16b, v3.16b, v2.16b
+	encrypt_block	v3, w3, x2, x6, w7
+	st1		{v0.16b-v3.16b}, [x0], #64
+	mov		v4.16b, v3.16b
+	b		.Lcbcencloop4x
+.Lcbcenc1x:
+	adds		w4, w4, #4
+	beq		.Lcbcencout
 .Lcbcencloop:
-	ld1		{v1.16b}, [x1], #16		/* get next pt block */
-	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with iv */
-	encrypt_block	v0, w3, x2, x5, w6
-	st1		{v0.16b}, [x0], #16
+	ld1		{v0.16b}, [x1], #16		/* get next pt block */
+	eor		v4.16b, v4.16b, v0.16b		/* ..and xor with iv */
+	encrypt_block	v4, w3, x2, x6, w7
+	st1		{v4.16b}, [x0], #16
 	subs		w4, w4, #1
 	bne		.Lcbcencloop
-	st1		{v0.16b}, [x5]			/* save iv for later */
+.Lcbcencout:
+	st1		{v4.16b}, [x5]			/* return iv */
 	ret
 ENDPROC(ce_aes_cbc_encrypt)
 
 
 ENTRY(ce_aes_cbc_decrypt)
-	FRAME_PUSH
-	cbz		w6, .LcbcdecloopNx
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
 	ld1		{v7.16b}, [x5]			/* get iv */
-	dec_prepare	w3, x2, x5
+	dec_prepare	w3, x2, x6
 
 .LcbcdecloopNx:
-#if INTERLEAVE >= 2
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lcbcdec1x
-#if INTERLEAVE == 2
-	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
-	mov		v2.16b, v0.16b
-	mov		v3.16b, v1.16b
-	do_decrypt_block2x
-	eor		v0.16b, v0.16b, v7.16b
-	eor		v1.16b, v1.16b, v2.16b
-	mov		v7.16b, v3.16b
-	st1		{v0.16b-v1.16b}, [x0], #32
-#else
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
 	mov		v4.16b, v0.16b
 	mov		v5.16b, v1.16b
 	mov		v6.16b, v2.16b
-	do_decrypt_block4x
+	bl		aes_decrypt_block4x
 	sub		x1, x1, #16
 	eor		v0.16b, v0.16b, v7.16b
 	eor		v1.16b, v1.16b, v4.16b
@@ -385,24 +391,22 @@ ENTRY(ce_aes_cbc_decrypt)
 	eor		v2.16b, v2.16b, v5.16b
 	eor		v3.16b, v3.16b, v6.16b
 	st1		{v0.16b-v3.16b}, [x0], #64
-#endif
 	b		.LcbcdecloopNx
 .Lcbcdec1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lcbcdecout
-#endif
 .Lcbcdecloop:
 	ld1		{v1.16b}, [x1], #16		/* get next ct block */
 	mov		v0.16b, v1.16b			/* ...and copy to v0 */
-	decrypt_block	v0, w3, x2, x5, w6
+	decrypt_block	v0, w3, x2, x6, w7
 	eor		v0.16b, v0.16b, v7.16b		/* xor with iv => pt */
 	mov		v7.16b, v1.16b			/* ct is next iv */
 	st1		{v0.16b}, [x0], #16
 	subs		w4, w4, #1
 	bne		.Lcbcdecloop
 .Lcbcdecout:
-	st1		{v1.16b}, [x5]			/* save iv for later */
-	FRAME_POP
+	st1		{v7.16b}, [x5]			/* return iv */
+	ldp		x29, x30, [sp], #16
 	ret
 ENDPROC(ce_aes_cbc_decrypt)
 
-- 
2.7.4