From: Jussi Kivilinna Date: Mon, 8 Apr 2013 18:51:00 +0000 (+0300) Subject: crypto: x86/twofish-avx - use optimized XTS code X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=18be45270a80ab489d9402b63e1f103428f0afde;p=linux-beck.git crypto: x86/twofish-avx - use optimized XTS code Change twofish-avx to use the new XTS code, for smaller stack usage and small boost to performance. tcrypt results, with Intel i5-2450M: enc dec 16B 1.03x 1.02x 64B 0.91x 0.91x 256B 1.10x 1.09x 1024B 1.12x 1.11x 8192B 1.12x 1.11x Since XTS is practically always used with data blocks of size 512 bytes or more, I chose to not make use of twofish-3way for block sized smaller than 128 bytes. This causes slower result in tcrypt for 64 bytes. Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu --- diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index 8d3e113b2c95..05058134c443 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -4,7 +4,7 @@ * Copyright (C) 2012 Johannes Goetzfried * * - * Copyright © 2012 Jussi Kivilinna + * Copyright © 2012-2013 Jussi Kivilinna * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -33,6 +33,8 @@ .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.Lxts_gf128mul_and_shl1_mask: + .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 .text @@ -408,3 +410,47 @@ ENTRY(twofish_ctr_8way) ret; ENDPROC(twofish_ctr_8way) + +ENTRY(twofish_xts_enc_8way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) + */ + + movq %rsi, %r11; + + /* regs <= src, dst <= IVs, regs <= regs xor IVs */ + load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, + RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask); + + call __twofish_enc_blk8; + + /* dst <= regs xor IVs(in dst) */ + store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); + + ret; +ENDPROC(twofish_xts_enc_8way) + +ENTRY(twofish_xts_dec_8way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) + */ + + movq %rsi, %r11; + + /* regs <= src, dst <= IVs, regs <= regs xor IVs */ + load_xts_8way(%rcx, %rdx, %rsi, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2, + RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask); + + call __twofish_dec_blk8; + + /* dst <= regs xor IVs(in dst) */ + store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + ret; +ENDPROC(twofish_xts_dec_8way) diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 94ac91d26e47..a62ba541884e 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -4,6 +4,8 @@ * Copyright (C) 2012 Johannes Goetzfried * * + * Copyright © 2013 Jussi Kivilinna + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -56,12 +58,29 @@ asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src, le128 *iv); +asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src, le128 *iv); +asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src, le128 *iv); + static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src) { __twofish_enc_blk_3way(ctx, dst, src, false); } +static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) +{ + glue_xts_crypt_128bit_one(ctx, dst, src, iv, + GLUE_FUNC_CAST(twofish_enc_blk)); +} + +static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) +{ + glue_xts_crypt_128bit_one(ctx, dst, src, iv, + GLUE_FUNC_CAST(twofish_dec_blk)); +} + static const struct common_glue_ctx twofish_enc = { .num_funcs = 3, @@ -95,6 +114,19 @@ static const struct common_glue_ctx twofish_ctr = { } } }; +static const struct common_glue_ctx twofish_enc_xts = { + .num_funcs = 2, + .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = TWOFISH_PARALLEL_BLOCKS, + .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) } + }, { + .num_blocks = 1, + .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) } + } } +}; + static const struct common_glue_ctx twofish_dec = { .num_funcs = 3, .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, @@ -127,6 +159,19 @@ static const struct common_glue_ctx twofish_dec_cbc = { } } }; +static const struct common_glue_ctx twofish_dec_xts = { + .num_funcs = 2, + .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = TWOFISH_PARALLEL_BLOCKS, + .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) } + }, { + .num_blocks = 1, + .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) } + } } +}; + static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { @@ -275,54 +320,20 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[TWOFISH_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->crypt_ctx, - .fpu_enabled = false, - }; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = xts_crypt(desc, dst, src, nbytes, &req); - twofish_fpu_end(crypt_ctx.fpu_enabled); - - return ret; + return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes, + XTS_TWEAK_CAST(twofish_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[TWOFISH_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->crypt_ctx, - .fpu_enabled = false, - }; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = xts_crypt(desc, dst, src, nbytes, &req); - twofish_fpu_end(crypt_ctx.fpu_enabled); - - return ret; + return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes, + XTS_TWEAK_CAST(twofish_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } static struct crypto_alg twofish_algs[10] = { {