2 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
4 * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 /* included by aes-ce.S and aes-neon.S */
17 * There are several ways to instantiate this code:
18 * - no interleave, all inline
19 * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
20 * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
21 * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
22 * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
24 * Macros imported by this code:
25 * - enc_prepare - setup NEON registers for encryption
26 * - dec_prepare - setup NEON registers for decryption
27 * - enc_switch_key - change to new key after having prepared for encryption
28 * - encrypt_block - encrypt a single block
29 * - decrypt block - decrypt a single block
30 * - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2)
31 * - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2)
32 * - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4)
33 * - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4)
36 #if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
37 #define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp
38 #define FRAME_POP ldp x29, x30, [sp],#16
43 encrypt_block2x v0, v1, w3, x2, x6, w7
45 ENDPROC(aes_encrypt_block2x)
48 decrypt_block2x v0, v1, w3, x2, x6, w7
50 ENDPROC(aes_decrypt_block2x)
55 encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
57 ENDPROC(aes_encrypt_block4x)
60 decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
62 ENDPROC(aes_decrypt_block4x)
65 #error INTERLEAVE should equal 2 or 4
68 .macro do_encrypt_block2x
69 bl aes_encrypt_block2x
72 .macro do_decrypt_block2x
73 bl aes_decrypt_block2x
76 .macro do_encrypt_block4x
77 bl aes_encrypt_block4x
80 .macro do_decrypt_block4x
81 bl aes_decrypt_block4x
88 .macro do_encrypt_block2x
89 encrypt_block2x v0, v1, w3, x2, x6, w7
92 .macro do_decrypt_block2x
93 decrypt_block2x v0, v1, w3, x2, x6, w7
96 .macro do_encrypt_block4x
97 encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
100 .macro do_decrypt_block4x
101 decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
107 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
108 * int blocks, int first)
109 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
110 * int blocks, int first)
113 AES_ENTRY(aes_ecb_encrypt)
115 cbz w5, .LecbencloopNx
117 enc_prepare w3, x2, x5
121 subs w4, w4, #INTERLEAVE
124 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
126 st1 {v0.16b-v1.16b}, [x0], #32
128 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
130 st1 {v0.16b-v3.16b}, [x0], #64
134 adds w4, w4, #INTERLEAVE
138 ld1 {v0.16b}, [x1], #16 /* get next pt block */
139 encrypt_block v0, w3, x2, x5, w6
140 st1 {v0.16b}, [x0], #16
146 AES_ENDPROC(aes_ecb_encrypt)
149 AES_ENTRY(aes_ecb_decrypt)
151 cbz w5, .LecbdecloopNx
153 dec_prepare w3, x2, x5
157 subs w4, w4, #INTERLEAVE
160 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
162 st1 {v0.16b-v1.16b}, [x0], #32
164 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
166 st1 {v0.16b-v3.16b}, [x0], #64
170 adds w4, w4, #INTERLEAVE
174 ld1 {v0.16b}, [x1], #16 /* get next ct block */
175 decrypt_block v0, w3, x2, x5, w6
176 st1 {v0.16b}, [x0], #16
182 AES_ENDPROC(aes_ecb_decrypt)
186 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
187 * int blocks, u8 iv[], int first)
188 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
189 * int blocks, u8 iv[], int first)
192 AES_ENTRY(aes_cbc_encrypt)
195 ld1 {v0.16b}, [x5] /* get iv */
196 enc_prepare w3, x2, x6
199 ld1 {v1.16b}, [x1], #16 /* get next pt block */
200 eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */
201 encrypt_block v0, w3, x2, x6, w7
202 st1 {v0.16b}, [x0], #16
205 st1 {v0.16b}, [x5] /* return iv */
207 AES_ENDPROC(aes_cbc_encrypt)
210 AES_ENTRY(aes_cbc_decrypt)
212 cbz w6, .LcbcdecloopNx
214 ld1 {v7.16b}, [x5] /* get iv */
215 dec_prepare w3, x2, x6
219 subs w4, w4, #INTERLEAVE
222 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
226 eor v0.16b, v0.16b, v7.16b
227 eor v1.16b, v1.16b, v2.16b
229 st1 {v0.16b-v1.16b}, [x0], #32
231 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
237 eor v0.16b, v0.16b, v7.16b
238 eor v1.16b, v1.16b, v4.16b
239 ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
240 eor v2.16b, v2.16b, v5.16b
241 eor v3.16b, v3.16b, v6.16b
242 st1 {v0.16b-v3.16b}, [x0], #64
246 adds w4, w4, #INTERLEAVE
250 ld1 {v1.16b}, [x1], #16 /* get next ct block */
251 mov v0.16b, v1.16b /* ...and copy to v0 */
252 decrypt_block v0, w3, x2, x6, w7
253 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
254 mov v7.16b, v1.16b /* ct is next iv */
255 st1 {v0.16b}, [x0], #16
260 st1 {v7.16b}, [x5] /* return iv */
262 AES_ENDPROC(aes_cbc_decrypt)
266 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
267 * int blocks, u8 ctr[], int first)
270 AES_ENTRY(aes_ctr_encrypt)
272 cbz w6, .Lctrnotfirst /* 1st time around? */
273 enc_prepare w3, x2, x6
277 umov x8, v4.d[1] /* keep swabbed ctr in reg */
280 cmn w8, w4 /* 32 bit overflow? */
283 subs w4, w4, #INTERLEAVE
294 ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */
296 eor v0.16b, v0.16b, v2.16b
297 eor v1.16b, v1.16b, v3.16b
298 st1 {v0.16b-v1.16b}, [x0], #32
300 ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
303 add v7.4s, v7.4s, v8.4s
311 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
313 eor v0.16b, v5.16b, v0.16b
314 ld1 {v5.16b}, [x1], #16 /* get 1 input block */
315 eor v1.16b, v6.16b, v1.16b
316 eor v2.16b, v7.16b, v2.16b
317 eor v3.16b, v5.16b, v3.16b
318 st1 {v0.16b-v3.16b}, [x0], #64
319 add x8, x8, #INTERLEAVE
326 adds w4, w4, #INTERLEAVE
331 encrypt_block v0, w3, x2, x6, w7
333 adds x8, x8, #1 /* increment BE ctr */
336 bcs .Lctrcarry /* overflow? */
340 bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
341 ld1 {v3.16b}, [x1], #16
342 eor v3.16b, v0.16b, v3.16b
343 st1 {v3.16b}, [x0], #16
347 st1 {v4.16b}, [x5] /* return next CTR value */
353 eor v3.8b, v0.8b, v3.8b
359 umov x7, v4.d[0] /* load upper word of ctr */
360 rev x7, x7 /* ... to handle the carry */
365 AES_ENDPROC(aes_ctr_encrypt)
370 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
371 * int blocks, u8 const rk2[], u8 iv[], int first)
372 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
373 * int blocks, u8 const rk2[], u8 iv[], int first)
376 .macro next_tweak, out, in, const, tmp
377 sshr \tmp\().2d, \in\().2d, #63
378 and \tmp\().16b, \tmp\().16b, \const\().16b
379 add \out\().2d, \in\().2d, \in\().2d
380 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
381 eor \out\().16b, \out\().16b, \tmp\().16b
385 CPU_LE( .quad 1, 0x87 )
386 CPU_BE( .quad 0x87, 1 )
388 AES_ENTRY(aes_xts_encrypt)
390 cbz w7, .LxtsencloopNx
393 enc_prepare w3, x5, x6
394 encrypt_block v4, w3, x5, x6, w7 /* first tweak */
395 enc_switch_key w3, x2, x6
401 next_tweak v4, v4, v7, v8
404 subs w4, w4, #INTERLEAVE
407 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
408 next_tweak v5, v4, v7, v8
409 eor v0.16b, v0.16b, v4.16b
410 eor v1.16b, v1.16b, v5.16b
412 eor v0.16b, v0.16b, v4.16b
413 eor v1.16b, v1.16b, v5.16b
414 st1 {v0.16b-v1.16b}, [x0], #32
415 cbz w4, .LxtsencoutNx
416 next_tweak v4, v5, v7, v8
422 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
423 next_tweak v5, v4, v7, v8
424 eor v0.16b, v0.16b, v4.16b
425 next_tweak v6, v5, v7, v8
426 eor v1.16b, v1.16b, v5.16b
427 eor v2.16b, v2.16b, v6.16b
428 next_tweak v7, v6, v7, v8
429 eor v3.16b, v3.16b, v7.16b
431 eor v3.16b, v3.16b, v7.16b
432 eor v0.16b, v0.16b, v4.16b
433 eor v1.16b, v1.16b, v5.16b
434 eor v2.16b, v2.16b, v6.16b
435 st1 {v0.16b-v3.16b}, [x0], #64
441 adds w4, w4, #INTERLEAVE
445 ld1 {v1.16b}, [x1], #16
446 eor v0.16b, v1.16b, v4.16b
447 encrypt_block v0, w3, x2, x6, w7
448 eor v0.16b, v0.16b, v4.16b
449 st1 {v0.16b}, [x0], #16
452 next_tweak v4, v4, v7, v8
457 AES_ENDPROC(aes_xts_encrypt)
460 AES_ENTRY(aes_xts_decrypt)
462 cbz w7, .LxtsdecloopNx
465 enc_prepare w3, x5, x6
466 encrypt_block v4, w3, x5, x6, w7 /* first tweak */
467 dec_prepare w3, x2, x6
473 next_tweak v4, v4, v7, v8
476 subs w4, w4, #INTERLEAVE
479 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
480 next_tweak v5, v4, v7, v8
481 eor v0.16b, v0.16b, v4.16b
482 eor v1.16b, v1.16b, v5.16b
484 eor v0.16b, v0.16b, v4.16b
485 eor v1.16b, v1.16b, v5.16b
486 st1 {v0.16b-v1.16b}, [x0], #32
487 cbz w4, .LxtsdecoutNx
488 next_tweak v4, v5, v7, v8
494 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
495 next_tweak v5, v4, v7, v8
496 eor v0.16b, v0.16b, v4.16b
497 next_tweak v6, v5, v7, v8
498 eor v1.16b, v1.16b, v5.16b
499 eor v2.16b, v2.16b, v6.16b
500 next_tweak v7, v6, v7, v8
501 eor v3.16b, v3.16b, v7.16b
503 eor v3.16b, v3.16b, v7.16b
504 eor v0.16b, v0.16b, v4.16b
505 eor v1.16b, v1.16b, v5.16b
506 eor v2.16b, v2.16b, v6.16b
507 st1 {v0.16b-v3.16b}, [x0], #64
513 adds w4, w4, #INTERLEAVE
517 ld1 {v1.16b}, [x1], #16
518 eor v0.16b, v1.16b, v4.16b
519 decrypt_block v0, w3, x2, x6, w7
520 eor v0.16b, v0.16b, v4.16b
521 st1 {v0.16b}, [x0], #16
524 next_tweak v4, v4, v7, v8
529 AES_ENDPROC(aes_xts_decrypt)