2 * Accelerated GHASH implementation with ARMv8 PMULL instructions.
4 * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
6 * Based on arch/x86/crypto/ghash-pmullni-intel_asm.S
8 * Copyright (c) 2009 Intel Corp.
9 * Author: Huang Ying <ying.huang@intel.com>
14 * This program is free software; you can redistribute it and/or modify it
15 * under the terms of the GNU General Public License version 2 as published
16 * by the Free Software Foundation.
19 #include <linux/linkage.h>
20 #include <asm/assembler.h>
34 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
35 * struct ghash_key const *k, const char *head)
37 ENTRY(pmull_ghash_update)
40 eor VZR.16b, VZR.16b, VZR.16b
42 /* do the head block first, if supplied */
47 0: ld1 {IN1.2d}, [x2], #16
49 1: ext IN1.16b, IN1.16b, IN1.16b, #8
50 CPU_LE( rev64 IN1.16b, IN1.16b )
51 eor DATA.16b, DATA.16b, IN1.16b
53 /* multiply DATA by SHASH in GF(2^128) */
54 ext T2.16b, DATA.16b, DATA.16b, #8
55 ext T3.16b, SHASH.16b, SHASH.16b, #8
56 eor T2.16b, T2.16b, DATA.16b
57 eor T3.16b, T3.16b, SHASH.16b
59 pmull2 T1.1q, SHASH.2d, DATA.2d // a1 * b1
60 pmull DATA.1q, SHASH.1d, DATA.1d // a0 * b0
61 pmull T2.1q, T2.1d, T3.1d // (a1 + a0)(b1 + b0)
62 eor T2.16b, T2.16b, T1.16b // (a0 * b1) + (a1 * b0)
63 eor T2.16b, T2.16b, DATA.16b
65 ext T3.16b, VZR.16b, T2.16b, #8
66 ext T2.16b, T2.16b, VZR.16b, #8
67 eor DATA.16b, DATA.16b, T3.16b
68 eor T1.16b, T1.16b, T2.16b // <T1:DATA> is result of
69 // carry-less multiplication
71 /* first phase of the reduction */
72 shl T3.2d, DATA.2d, #1
73 eor T3.16b, T3.16b, DATA.16b
75 eor T3.16b, T3.16b, DATA.16b
77 ext T2.16b, VZR.16b, T3.16b, #8
78 ext T3.16b, T3.16b, VZR.16b, #8
79 eor DATA.16b, DATA.16b, T2.16b
80 eor T1.16b, T1.16b, T3.16b
82 /* second phase of the reduction */
83 ushr T2.2d, DATA.2d, #5
84 eor T2.16b, T2.16b, DATA.16b
86 eor T2.16b, T2.16b, DATA.16b
88 eor T1.16b, T1.16b, T2.16b
89 eor DATA.16b, DATA.16b, T1.16b
95 ENDPROC(pmull_ghash_update)