2 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
4 * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 /* included by aes-ce.S and aes-neon.S */
17 * There are several ways to instantiate this code:
18 * - no interleave, all inline
19 * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
20 * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
21 * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
22 * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
24 * Macros imported by this code:
25 * - enc_prepare - setup NEON registers for encryption
26 * - dec_prepare - setup NEON registers for decryption
27 * - enc_switch_key - change to new key after having prepared for encryption
28 * - encrypt_block - encrypt a single block
29 * - decrypt block - decrypt a single block
30 * - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2)
31 * - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2)
32 * - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4)
33 * - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4)
36 #if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
37 #define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp
38 #define FRAME_POP ldp x29, x30, [sp],#16
43 encrypt_block2x v0, v1, w3, x2, x6, w7
45 ENDPROC(aes_encrypt_block2x)
48 decrypt_block2x v0, v1, w3, x2, x6, w7
50 ENDPROC(aes_decrypt_block2x)
55 encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
57 ENDPROC(aes_encrypt_block4x)
60 decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
62 ENDPROC(aes_decrypt_block4x)
65 #error INTERLEAVE should equal 2 or 4
68 .macro do_encrypt_block2x
69 bl aes_encrypt_block2x
72 .macro do_decrypt_block2x
73 bl aes_decrypt_block2x
76 .macro do_encrypt_block4x
77 bl aes_encrypt_block4x
80 .macro do_decrypt_block4x
81 bl aes_decrypt_block4x
88 .macro do_encrypt_block2x
89 encrypt_block2x v0, v1, w3, x2, x6, w7
92 .macro do_decrypt_block2x
93 decrypt_block2x v0, v1, w3, x2, x6, w7
96 .macro do_encrypt_block4x
97 encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
100 .macro do_decrypt_block4x
101 decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
107 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
108 * int blocks, int first)
109 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
110 * int blocks, int first)
113 AES_ENTRY(aes_ecb_encrypt)
115 cbz w5, .LecbencloopNx
117 enc_prepare w3, x2, x5
121 subs w4, w4, #INTERLEAVE
124 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
126 st1 {v0.16b-v1.16b}, [x0], #32
128 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
130 st1 {v0.16b-v3.16b}, [x0], #64
134 adds w4, w4, #INTERLEAVE
138 ld1 {v0.16b}, [x1], #16 /* get next pt block */
139 encrypt_block v0, w3, x2, x5, w6
140 st1 {v0.16b}, [x0], #16
146 AES_ENDPROC(aes_ecb_encrypt)
149 AES_ENTRY(aes_ecb_decrypt)
151 cbz w5, .LecbdecloopNx
153 dec_prepare w3, x2, x5
157 subs w4, w4, #INTERLEAVE
160 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
162 st1 {v0.16b-v1.16b}, [x0], #32
164 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
166 st1 {v0.16b-v3.16b}, [x0], #64
170 adds w4, w4, #INTERLEAVE
174 ld1 {v0.16b}, [x1], #16 /* get next ct block */
175 decrypt_block v0, w3, x2, x5, w6
176 st1 {v0.16b}, [x0], #16
182 AES_ENDPROC(aes_ecb_decrypt)
186 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
187 * int blocks, u8 iv[], int first)
188 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
189 * int blocks, u8 iv[], int first)
192 AES_ENTRY(aes_cbc_encrypt)
195 ld1 {v0.16b}, [x5] /* get iv */
196 enc_prepare w3, x2, x5
199 ld1 {v1.16b}, [x1], #16 /* get next pt block */
200 eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */
201 encrypt_block v0, w3, x2, x5, w6
202 st1 {v0.16b}, [x0], #16
206 AES_ENDPROC(aes_cbc_encrypt)
209 AES_ENTRY(aes_cbc_decrypt)
211 cbz w6, .LcbcdecloopNx
213 ld1 {v7.16b}, [x5] /* get iv */
214 dec_prepare w3, x2, x5
218 subs w4, w4, #INTERLEAVE
221 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
225 eor v0.16b, v0.16b, v7.16b
226 eor v1.16b, v1.16b, v2.16b
228 st1 {v0.16b-v1.16b}, [x0], #32
230 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
236 eor v0.16b, v0.16b, v7.16b
237 eor v1.16b, v1.16b, v4.16b
238 ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
239 eor v2.16b, v2.16b, v5.16b
240 eor v3.16b, v3.16b, v6.16b
241 st1 {v0.16b-v3.16b}, [x0], #64
245 adds w4, w4, #INTERLEAVE
249 ld1 {v1.16b}, [x1], #16 /* get next ct block */
250 mov v0.16b, v1.16b /* ...and copy to v0 */
251 decrypt_block v0, w3, x2, x5, w6
252 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
253 mov v7.16b, v1.16b /* ct is next iv */
254 st1 {v0.16b}, [x0], #16
260 AES_ENDPROC(aes_cbc_decrypt)
264 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
265 * int blocks, u8 ctr[], int first)
268 AES_ENTRY(aes_ctr_encrypt)
270 cbnz w6, .Lctrfirst /* 1st time around? */
271 umov x5, v4.d[1] /* keep swabbed ctr in reg */
274 cmn w5, w4 /* 32 bit overflow? */
276 add x5, x5, #1 /* increment BE ctr */
282 enc_prepare w3, x2, x6
284 umov x5, v4.d[1] /* keep swabbed ctr in reg */
287 cmn w5, w4 /* 32 bit overflow? */
290 subs w4, w4, #INTERLEAVE
301 ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */
303 eor v0.16b, v0.16b, v2.16b
304 eor v1.16b, v1.16b, v3.16b
305 st1 {v0.16b-v1.16b}, [x0], #32
307 ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
310 add v7.4s, v7.4s, v8.4s
318 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
320 eor v0.16b, v5.16b, v0.16b
321 ld1 {v5.16b}, [x1], #16 /* get 1 input block */
322 eor v1.16b, v6.16b, v1.16b
323 eor v2.16b, v7.16b, v2.16b
324 eor v3.16b, v5.16b, v3.16b
325 st1 {v0.16b-v3.16b}, [x0], #64
326 add x5, x5, #INTERLEAVE
339 adds w4, w4, #INTERLEAVE
344 encrypt_block v0, w3, x2, x6, w7
346 bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
347 ld1 {v3.16b}, [x1], #16
348 eor v3.16b, v0.16b, v3.16b
349 st1 {v3.16b}, [x0], #16
352 adds x5, x5, #1 /* increment BE ctr */
355 bcc .Lctrloop /* no overflow? */
356 umov x7, v4.d[0] /* load upper word of ctr */
357 rev x7, x7 /* ... to handle the carry */
364 eor v3.8b, v0.8b, v3.8b
369 AES_ENDPROC(aes_ctr_encrypt)
374 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
375 * int blocks, u8 const rk2[], u8 iv[], int first)
376 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
377 * int blocks, u8 const rk2[], u8 iv[], int first)
380 .macro next_tweak, out, in, const, tmp
381 sshr \tmp\().2d, \in\().2d, #63
382 and \tmp\().16b, \tmp\().16b, \const\().16b
383 add \out\().2d, \in\().2d, \in\().2d
384 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
385 eor \out\().16b, \out\().16b, \tmp\().16b
389 CPU_LE( .quad 1, 0x87 )
390 CPU_BE( .quad 0x87, 1 )
392 AES_ENTRY(aes_xts_encrypt)
394 cbz w7, .LxtsencloopNx
397 enc_prepare w3, x5, x6
398 encrypt_block v4, w3, x5, x6, w7 /* first tweak */
399 enc_switch_key w3, x2, x6
405 next_tweak v4, v4, v7, v8
408 subs w4, w4, #INTERLEAVE
411 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
412 next_tweak v5, v4, v7, v8
413 eor v0.16b, v0.16b, v4.16b
414 eor v1.16b, v1.16b, v5.16b
416 eor v0.16b, v0.16b, v4.16b
417 eor v1.16b, v1.16b, v5.16b
418 st1 {v0.16b-v1.16b}, [x0], #32
419 cbz w4, .LxtsencoutNx
420 next_tweak v4, v5, v7, v8
426 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
427 next_tweak v5, v4, v7, v8
428 eor v0.16b, v0.16b, v4.16b
429 next_tweak v6, v5, v7, v8
430 eor v1.16b, v1.16b, v5.16b
431 eor v2.16b, v2.16b, v6.16b
432 next_tweak v7, v6, v7, v8
433 eor v3.16b, v3.16b, v7.16b
435 eor v3.16b, v3.16b, v7.16b
436 eor v0.16b, v0.16b, v4.16b
437 eor v1.16b, v1.16b, v5.16b
438 eor v2.16b, v2.16b, v6.16b
439 st1 {v0.16b-v3.16b}, [x0], #64
445 adds w4, w4, #INTERLEAVE
449 ld1 {v1.16b}, [x1], #16
450 eor v0.16b, v1.16b, v4.16b
451 encrypt_block v0, w3, x2, x6, w7
452 eor v0.16b, v0.16b, v4.16b
453 st1 {v0.16b}, [x0], #16
456 next_tweak v4, v4, v7, v8
461 AES_ENDPROC(aes_xts_encrypt)
464 AES_ENTRY(aes_xts_decrypt)
466 cbz w7, .LxtsdecloopNx
469 enc_prepare w3, x5, x6
470 encrypt_block v4, w3, x5, x6, w7 /* first tweak */
471 dec_prepare w3, x2, x6
477 next_tweak v4, v4, v7, v8
480 subs w4, w4, #INTERLEAVE
483 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
484 next_tweak v5, v4, v7, v8
485 eor v0.16b, v0.16b, v4.16b
486 eor v1.16b, v1.16b, v5.16b
488 eor v0.16b, v0.16b, v4.16b
489 eor v1.16b, v1.16b, v5.16b
490 st1 {v0.16b-v1.16b}, [x0], #32
491 cbz w4, .LxtsdecoutNx
492 next_tweak v4, v5, v7, v8
498 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
499 next_tweak v5, v4, v7, v8
500 eor v0.16b, v0.16b, v4.16b
501 next_tweak v6, v5, v7, v8
502 eor v1.16b, v1.16b, v5.16b
503 eor v2.16b, v2.16b, v6.16b
504 next_tweak v7, v6, v7, v8
505 eor v3.16b, v3.16b, v7.16b
507 eor v3.16b, v3.16b, v7.16b
508 eor v0.16b, v0.16b, v4.16b
509 eor v1.16b, v1.16b, v5.16b
510 eor v2.16b, v2.16b, v6.16b
511 st1 {v0.16b-v3.16b}, [x0], #64
517 adds w4, w4, #INTERLEAVE
521 ld1 {v1.16b}, [x1], #16
522 eor v0.16b, v1.16b, v4.16b
523 decrypt_block v0, w3, x2, x6, w7
524 eor v0.16b, v0.16b, v4.16b
525 st1 {v0.16b}, [x0], #16
528 next_tweak v4, v4, v7, v8
533 AES_ENDPROC(aes_xts_decrypt)