]> git.karo-electronics.de Git - linux-beck.git/commitdiff
crypto: twofish-x86_64-3way - blacklist pentium4 and atom
authorJussi Kivilinna <jussi.kivilinna@mbnet.fi>
Tue, 20 Dec 2011 10:20:16 +0000 (12:20 +0200)
committerHerbert Xu <herbert@gondor.apana.org.au>
Fri, 13 Jan 2012 05:38:39 +0000 (16:38 +1100)
Performance of twofish-x86_64-3way on Intel Pentium 4 and Atom is lower than
of twofish-x86_64 module. So blacklist these CPUs.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/x86/crypto/twofish_glue_3way.c

index 7fee8c152f93c73d430b138a464dcb83f8cfaf0e..0afd134d8c9c9345d85c03846c50c22994afc57a 100644 (file)
@@ -25,6 +25,7 @@
  *
  */
 
+#include <asm/processor.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -637,10 +638,56 @@ static struct crypto_alg blk_xts_alg = {
        },
 };
 
+static bool is_blacklisted_cpu(void)
+{
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+               return false;
+
+       if (boot_cpu_data.x86 == 0x06 &&
+               (boot_cpu_data.x86_model == 0x1c ||
+                boot_cpu_data.x86_model == 0x26 ||
+                boot_cpu_data.x86_model == 0x36)) {
+               /*
+                * On Atom, twofish-3way is slower than original assembler
+                * implementation. Twofish-3way trades off some performance in
+                * storing blocks in 64bit registers to allow three blocks to
+                * be processed parallel. Parallel operation then allows gaining
+                * more performance than was trade off, on out-of-order CPUs.
+                * However Atom does not benefit from this parallellism and
+                * should be blacklisted.
+                */
+               return true;
+       }
+
+       if (boot_cpu_data.x86 == 0x0f) {
+               /*
+                * On Pentium 4, twofish-3way is slower than original assembler
+                * implementation because excessive uses of 64bit rotate and
+                * left-shifts (which are really slow on P4) needed to store and
+                * handle 128bit block in two 64bit registers.
+                */
+               return true;
+       }
+
+       return false;
+}
+
+static int force;
+module_param(force, int, 0);
+MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
+
 int __init init(void)
 {
        int err;
 
+       if (!force && is_blacklisted_cpu()) {
+               printk(KERN_INFO
+                       "twofish-x86_64-3way: performance on this CPU "
+                       "would be suboptimal: disabling "
+                       "twofish-x86_64-3way.\n");
+               return -ENODEV;
+       }
+
        err = crypto_register_alg(&blk_ecb_alg);
        if (err)
                goto ecb_err;