Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 21 May 2010 21:46:51 +0000 (14:46 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 21 May 2010 21:46:51 +0000 (14:46 -0700)
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (46 commits)
  random: simplify fips mode
  crypto: authenc - Fix cryptlen calculation
  crypto: talitos - add support for sha224
  crypto: talitos - add hash algorithms
  crypto: talitos - second prepare step for adding ahash algorithms
  crypto: talitos - prepare for adding ahash algorithms
  crypto: n2 - Add Niagara2 crypto driver
  crypto: skcipher - Add ablkcipher_walk interfaces
  crypto: testmgr - Add testing for async hashing and update/final
  crypto: tcrypt - Add speed tests for async hashing
  crypto: scatterwalk - Fix scatterwalk_done() test
  crypto: hifn_795x - Rename ablkcipher_walk to hifn_cipher_walk
  padata: Use get_online_cpus/put_online_cpus in padata_free
  padata: Add some code comments
  padata: Flush the padata queues actively
  padata: Use a timer to handle remaining objects in the reorder queues
  crypto: shash - Remove usage of CRYPTO_MINALIGN
  crypto: mv_cesa - Use resource_size
  crypto: omap - OMAP macros corrected
  padata: Use get_online_cpus/put_online_cpus
  ...

Fix up conflicts in arch/arm/mach-omap2/devices.c

36 files changed:
arch/arm/mach-omap2/clock2420_data.c
arch/arm/mach-omap2/clock2430_data.c
arch/arm/mach-omap2/clock3xxx_data.c
arch/arm/mach-omap2/devices.c
arch/arm/plat-omap/include/plat/omap34xx.h
arch/x86/crypto/aesni-intel_asm.S
arch/x86/crypto/aesni-intel_glue.c
arch/x86/include/asm/inst.h
crypto/ablkcipher.c
crypto/algapi.c
crypto/authenc.c
crypto/internal.h
crypto/pcrypt.c
crypto/scatterwalk.c
crypto/shash.c
crypto/tcrypt.c
crypto/tcrypt.h
crypto/testmgr.c
crypto/testmgr.h
crypto/vmac.c
drivers/char/random.c
drivers/crypto/Kconfig
drivers/crypto/Makefile
drivers/crypto/geode-aes.c
drivers/crypto/hifn_795x.c
drivers/crypto/mv_cesa.c
drivers/crypto/mv_cesa.h
drivers/crypto/n2_asm.S [new file with mode: 0644]
drivers/crypto/n2_core.c [new file with mode: 0644]
drivers/crypto/n2_core.h [new file with mode: 0644]
drivers/crypto/omap-sham.c [new file with mode: 0644]
drivers/crypto/talitos.c
drivers/crypto/talitos.h
include/crypto/algapi.h
include/linux/padata.h
kernel/padata.c

index 23bc981..37d65d6 100644 (file)
@@ -1836,7 +1836,7 @@ static struct omap_clk omap2420_clks[] = {
        CLK(NULL,       "vlynq_ick",    &vlynq_ick,     CK_242X),
        CLK(NULL,       "vlynq_fck",    &vlynq_fck,     CK_242X),
        CLK(NULL,       "des_ick",      &des_ick,       CK_242X),
-       CLK(NULL,       "sha_ick",      &sha_ick,       CK_242X),
+       CLK("omap-sham",        "ick",  &sha_ick,       CK_242X),
        CLK("omap_rng", "ick",          &rng_ick,       CK_242X),
        CLK(NULL,       "aes_ick",      &aes_ick,       CK_242X),
        CLK(NULL,       "pka_ick",      &pka_ick,       CK_242X),
index 2df50d9..b33118f 100644 (file)
@@ -1924,7 +1924,7 @@ static struct omap_clk omap2430_clks[] = {
        CLK(NULL,       "sdma_ick",     &sdma_ick,      CK_243X),
        CLK(NULL,       "sdrc_ick",     &sdrc_ick,      CK_243X),
        CLK(NULL,       "des_ick",      &des_ick,       CK_243X),
-       CLK(NULL,       "sha_ick",      &sha_ick,       CK_243X),
+       CLK("omap-sham",        "ick",  &sha_ick,       CK_243X),
        CLK("omap_rng", "ick",          &rng_ick,       CK_243X),
        CLK(NULL,       "aes_ick",      &aes_ick,       CK_243X),
        CLK(NULL,       "pka_ick",      &pka_ick,       CK_243X),
index 833be48..41b155a 100644 (file)
@@ -3284,7 +3284,7 @@ static struct omap_clk omap3xxx_clks[] = {
        CLK("mmci-omap-hs.2",   "ick",  &mmchs3_ick,    CK_3430ES2 | CK_AM35XX),
        CLK(NULL,       "icr_ick",      &icr_ick,       CK_343X),
        CLK(NULL,       "aes2_ick",     &aes2_ick,      CK_343X),
-       CLK(NULL,       "sha12_ick",    &sha12_ick,     CK_343X),
+       CLK("omap-sham",        "ick",  &sha12_ick,     CK_343X),
        CLK(NULL,       "des2_ick",     &des2_ick,      CK_343X),
        CLK("mmci-omap-hs.1",   "ick",  &mmchs2_ick,    CK_3XXX),
        CLK("mmci-omap-hs.0",   "ick",  &mmchs1_ick,    CK_3XXX),
index 705a7a3..03e6c9e 100644 (file)
@@ -28,6 +28,7 @@
 #include <plat/mux.h>
 #include <mach/gpio.h>
 #include <plat/mmc.h>
+#include <plat/dma.h>
 
 #include "mux.h"
 
@@ -486,8 +487,10 @@ static void omap_init_pmu(void)
 }
 
 
-#ifdef CONFIG_OMAP_SHA1_MD5
-static struct resource sha1_md5_resources[] = {
+#if defined(CONFIG_CRYPTO_DEV_OMAP_SHAM) || defined(CONFIG_CRYPTO_DEV_OMAP_SHAM_MODULE)
+
+#ifdef CONFIG_ARCH_OMAP2
+static struct resource omap2_sham_resources[] = {
        {
                .start  = OMAP24XX_SEC_SHA1MD5_BASE,
                .end    = OMAP24XX_SEC_SHA1MD5_BASE + 0x64,
@@ -498,20 +501,55 @@ static struct resource sha1_md5_resources[] = {
                .flags  = IORESOURCE_IRQ,
        }
 };
+static int omap2_sham_resources_sz = ARRAY_SIZE(omap2_sham_resources);
+#else
+#define omap2_sham_resources           NULL
+#define omap2_sham_resources_sz                0
+#endif
 
-static struct platform_device sha1_md5_device = {
-       .name           = "OMAP SHA1/MD5",
+#ifdef CONFIG_ARCH_OMAP3
+static struct resource omap3_sham_resources[] = {
+       {
+               .start  = OMAP34XX_SEC_SHA1MD5_BASE,
+               .end    = OMAP34XX_SEC_SHA1MD5_BASE + 0x64,
+               .flags  = IORESOURCE_MEM,
+       },
+       {
+               .start  = INT_34XX_SHA1MD52_IRQ,
+               .flags  = IORESOURCE_IRQ,
+       },
+       {
+               .start  = OMAP34XX_DMA_SHA1MD5_RX,
+               .flags  = IORESOURCE_DMA,
+       }
+};
+static int omap3_sham_resources_sz = ARRAY_SIZE(omap3_sham_resources);
+#else
+#define omap3_sham_resources           NULL
+#define omap3_sham_resources_sz                0
+#endif
+
+static struct platform_device sham_device = {
+       .name           = "omap-sham",
        .id             = -1,
-       .num_resources  = ARRAY_SIZE(sha1_md5_resources),
-       .resource       = sha1_md5_resources,
 };
 
-static void omap_init_sha1_md5(void)
+static void omap_init_sham(void)
 {
-       platform_device_register(&sha1_md5_device);
+       if (cpu_is_omap24xx()) {
+               sham_device.resource = omap2_sham_resources;
+               sham_device.num_resources = omap2_sham_resources_sz;
+       } else if (cpu_is_omap34xx()) {
+               sham_device.resource = omap3_sham_resources;
+               sham_device.num_resources = omap3_sham_resources_sz;
+       } else {
+               pr_err("%s: platform not supported\n", __func__);
+               return;
+       }
+       platform_device_register(&sham_device);
 }
 #else
-static inline void omap_init_sha1_md5(void) { }
+static inline void omap_init_sham(void) { }
 #endif
 
 /*-------------------------------------------------------------------------*/
@@ -869,7 +907,7 @@ static int __init omap2_init_devices(void)
        omap_init_pmu();
        omap_hdq_init();
        omap_init_sti();
-       omap_init_sha1_md5();
+       omap_init_sham();
        omap_init_vout();
 
        return 0;
index 2845fdc..98fc8b4 100644 (file)
 
 #define OMAP34XX_MAILBOX_BASE          (L4_34XX_BASE + 0x94000)
 
+/* Security */
+#define OMAP34XX_SEC_BASE      (L4_34XX_BASE + 0xA0000)
+#define OMAP34XX_SEC_SHA1MD5_BASE      (OMAP34XX_SEC_BASE + 0x23000)
+#define OMAP34XX_SEC_AES_BASE  (OMAP34XX_SEC_BASE + 0x25000)
+
 #endif /* __ASM_ARCH_OMAP3_H */
 
index 20bb0e1..ff16756 100644 (file)
@@ -32,6 +32,9 @@
 #define IN     IN1
 #define KEY    %xmm2
 #define IV     %xmm3
+#define BSWAP_MASK %xmm10
+#define CTR    %xmm11
+#define INC    %xmm12
 
 #define KEYP   %rdi
 #define OUTP   %rsi
@@ -42,6 +45,7 @@
 #define T1     %r10
 #define TKEYP  T1
 #define T2     %r11
+#define TCTR_LOW T2
 
 _key_expansion_128:
 _key_expansion_256a:
@@ -724,3 +728,114 @@ ENTRY(aesni_cbc_dec)
        movups IV, (IVP)
 .Lcbc_dec_just_ret:
        ret
+
+.align 16
+.Lbswap_mask:
+       .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/*
+ * _aesni_inc_init:    internal ABI
+ *     setup registers used by _aesni_inc
+ * input:
+ *     IV
+ * output:
+ *     CTR:    == IV, in little endian
+ *     TCTR_LOW: == lower qword of CTR
+ *     INC:    == 1, in little endian
+ *     BSWAP_MASK == endian swapping mask
+ */
+_aesni_inc_init:
+       movaps .Lbswap_mask, BSWAP_MASK
+       movaps IV, CTR
+       PSHUFB_XMM BSWAP_MASK CTR
+       mov $1, TCTR_LOW
+       MOVQ_R64_XMM TCTR_LOW INC
+       MOVQ_R64_XMM CTR TCTR_LOW
+       ret
+
+/*
+ * _aesni_inc:         internal ABI
+ *     Increase IV by 1, IV is in big endian
+ * input:
+ *     IV
+ *     CTR:    == IV, in little endian
+ *     TCTR_LOW: == lower qword of CTR
+ *     INC:    == 1, in little endian
+ *     BSWAP_MASK == endian swapping mask
+ * output:
+ *     IV:     Increase by 1
+ * changed:
+ *     CTR:    == output IV, in little endian
+ *     TCTR_LOW: == lower qword of CTR
+ */
+_aesni_inc:
+       paddq INC, CTR
+       add $1, TCTR_LOW
+       jnc .Linc_low
+       pslldq $8, INC
+       paddq INC, CTR
+       psrldq $8, INC
+.Linc_low:
+       movaps CTR, IV
+       PSHUFB_XMM BSWAP_MASK IV
+       ret
+
+/*
+ * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ *                   size_t len, u8 *iv)
+ */
+ENTRY(aesni_ctr_enc)
+       cmp $16, LEN
+       jb .Lctr_enc_just_ret
+       mov 480(KEYP), KLEN
+       movups (IVP), IV
+       call _aesni_inc_init
+       cmp $64, LEN
+       jb .Lctr_enc_loop1
+.align 4
+.Lctr_enc_loop4:
+       movaps IV, STATE1
+       call _aesni_inc
+       movups (INP), IN1
+       movaps IV, STATE2
+       call _aesni_inc
+       movups 0x10(INP), IN2
+       movaps IV, STATE3
+       call _aesni_inc
+       movups 0x20(INP), IN3
+       movaps IV, STATE4
+       call _aesni_inc
+       movups 0x30(INP), IN4
+       call _aesni_enc4
+       pxor IN1, STATE1
+       movups STATE1, (OUTP)
+       pxor IN2, STATE2
+       movups STATE2, 0x10(OUTP)
+       pxor IN3, STATE3
+       movups STATE3, 0x20(OUTP)
+       pxor IN4, STATE4
+       movups STATE4, 0x30(OUTP)
+       sub $64, LEN
+       add $64, INP
+       add $64, OUTP
+       cmp $64, LEN
+       jge .Lctr_enc_loop4
+       cmp $16, LEN
+       jb .Lctr_enc_ret
+.align 4
+.Lctr_enc_loop1:
+       movaps IV, STATE
+       call _aesni_inc
+       movups (INP), IN
+       call _aesni_enc1
+       pxor IN, STATE
+       movups STATE, (OUTP)
+       sub $16, LEN
+       add $16, INP
+       add $16, OUTP
+       cmp $16, LEN
+       jge .Lctr_enc_loop1
+.Lctr_enc_ret:
+       movups IV, (IVP)
+.Lctr_enc_just_ret:
+       ret
index 49c552c..2cb3dcc 100644 (file)
@@ -18,6 +18,7 @@
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
 #include <crypto/cryptd.h>
+#include <crypto/ctr.h>
 #include <asm/i387.h>
 #include <asm/aes.h>
 
@@ -58,6 +59,8 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
                              const u8 *in, unsigned int len, u8 *iv);
 asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
                              const u8 *in, unsigned int len, u8 *iv);
+asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
+                             const u8 *in, unsigned int len, u8 *iv);
 
 static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
 {
@@ -321,6 +324,72 @@ static struct crypto_alg blk_cbc_alg = {
        },
 };
 
+static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
+                           struct blkcipher_walk *walk)
+{
+       u8 *ctrblk = walk->iv;
+       u8 keystream[AES_BLOCK_SIZE];
+       u8 *src = walk->src.virt.addr;
+       u8 *dst = walk->dst.virt.addr;
+       unsigned int nbytes = walk->nbytes;
+
+       aesni_enc(ctx, keystream, ctrblk);
+       crypto_xor(keystream, src, nbytes);
+       memcpy(dst, keystream, nbytes);
+       crypto_inc(ctrblk, AES_BLOCK_SIZE);
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc,
+                    struct scatterlist *dst, struct scatterlist *src,
+                    unsigned int nbytes)
+{
+       struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
+       struct blkcipher_walk walk;
+       int err;
+
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       kernel_fpu_begin();
+       while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
+               aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+                             nbytes & AES_BLOCK_MASK, walk.iv);
+               nbytes &= AES_BLOCK_SIZE - 1;
+               err = blkcipher_walk_done(desc, &walk, nbytes);
+       }
+       if (walk.nbytes) {
+               ctr_crypt_final(ctx, &walk);
+               err = blkcipher_walk_done(desc, &walk, 0);
+       }
+       kernel_fpu_end();
+
+       return err;
+}
+
+static struct crypto_alg blk_ctr_alg = {
+       .cra_name               = "__ctr-aes-aesni",
+       .cra_driver_name        = "__driver-ctr-aes-aesni",
+       .cra_priority           = 0,
+       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER,
+       .cra_blocksize          = 1,
+       .cra_ctxsize            = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
+       .cra_alignmask          = 0,
+       .cra_type               = &crypto_blkcipher_type,
+       .cra_module             = THIS_MODULE,
+       .cra_list               = LIST_HEAD_INIT(blk_ctr_alg.cra_list),
+       .cra_u = {
+               .blkcipher = {
+                       .min_keysize    = AES_MIN_KEY_SIZE,
+                       .max_keysize    = AES_MAX_KEY_SIZE,
+                       .ivsize         = AES_BLOCK_SIZE,
+                       .setkey         = aes_set_key,
+                       .encrypt        = ctr_crypt,
+                       .decrypt        = ctr_crypt,
+               },
+       },
+};
+
 static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
                        unsigned int key_len)
 {
@@ -467,13 +536,11 @@ static struct crypto_alg ablk_cbc_alg = {
        },
 };
 
-#ifdef HAS_CTR
 static int ablk_ctr_init(struct crypto_tfm *tfm)
 {
        struct cryptd_ablkcipher *cryptd_tfm;
 
-       cryptd_tfm = cryptd_alloc_ablkcipher("fpu(ctr(__driver-aes-aesni))",
-                                            0, 0);
+       cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-aes-aesni", 0, 0);
        if (IS_ERR(cryptd_tfm))
                return PTR_ERR(cryptd_tfm);
        ablk_init_common(tfm, cryptd_tfm);
@@ -500,11 +567,50 @@ static struct crypto_alg ablk_ctr_alg = {
                        .ivsize         = AES_BLOCK_SIZE,
                        .setkey         = ablk_set_key,
                        .encrypt        = ablk_encrypt,
-                       .decrypt        = ablk_decrypt,
+                       .decrypt        = ablk_encrypt,
                        .geniv          = "chainiv",
                },
        },
 };
+
+#ifdef HAS_CTR
+static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm)
+{
+       struct cryptd_ablkcipher *cryptd_tfm;
+
+       cryptd_tfm = cryptd_alloc_ablkcipher(
+               "rfc3686(__driver-ctr-aes-aesni)", 0, 0);
+       if (IS_ERR(cryptd_tfm))
+               return PTR_ERR(cryptd_tfm);
+       ablk_init_common(tfm, cryptd_tfm);
+       return 0;
+}
+
+static struct crypto_alg ablk_rfc3686_ctr_alg = {
+       .cra_name               = "rfc3686(ctr(aes))",
+       .cra_driver_name        = "rfc3686-ctr-aes-aesni",
+       .cra_priority           = 400,
+       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+       .cra_blocksize          = 1,
+       .cra_ctxsize            = sizeof(struct async_aes_ctx),
+       .cra_alignmask          = 0,
+       .cra_type               = &crypto_ablkcipher_type,
+       .cra_module             = THIS_MODULE,
+       .cra_list               = LIST_HEAD_INIT(ablk_rfc3686_ctr_alg.cra_list),
+       .cra_init               = ablk_rfc3686_ctr_init,
+       .cra_exit               = ablk_exit,
+       .cra_u = {
+               .ablkcipher = {
+                       .min_keysize = AES_MIN_KEY_SIZE+CTR_RFC3686_NONCE_SIZE,
+                       .max_keysize = AES_MAX_KEY_SIZE+CTR_RFC3686_NONCE_SIZE,
+                       .ivsize      = CTR_RFC3686_IV_SIZE,
+                       .setkey      = ablk_set_key,
+                       .encrypt     = ablk_encrypt,
+                       .decrypt     = ablk_decrypt,
+                       .geniv       = "seqiv",
+               },
+       },
+};
 #endif
 
 #ifdef HAS_LRW
@@ -640,13 +746,17 @@ static int __init aesni_init(void)
                goto blk_ecb_err;
        if ((err = crypto_register_alg(&blk_cbc_alg)))
                goto blk_cbc_err;
+       if ((err = crypto_register_alg(&blk_ctr_alg)))
+               goto blk_ctr_err;
        if ((err = crypto_register_alg(&ablk_ecb_alg)))
                goto ablk_ecb_err;
        if ((err = crypto_register_alg(&ablk_cbc_alg)))
                goto ablk_cbc_err;
-#ifdef HAS_CTR
        if ((err = crypto_register_alg(&ablk_ctr_alg)))
                goto ablk_ctr_err;
+#ifdef HAS_CTR
+       if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg)))
+               goto ablk_rfc3686_ctr_err;
 #endif
 #ifdef HAS_LRW
        if ((err = crypto_register_alg(&ablk_lrw_alg)))
@@ -675,13 +785,17 @@ ablk_pcbc_err:
 ablk_lrw_err:
 #endif
 #ifdef HAS_CTR
+       crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
+ablk_rfc3686_ctr_err:
+#endif
        crypto_unregister_alg(&ablk_ctr_alg);
 ablk_ctr_err:
-#endif
        crypto_unregister_alg(&ablk_cbc_alg);
 ablk_cbc_err:
        crypto_unregister_alg(&ablk_ecb_alg);
 ablk_ecb_err:
+       crypto_unregister_alg(&blk_ctr_alg);
+blk_ctr_err:
        crypto_unregister_alg(&blk_cbc_alg);
 blk_cbc_err:
        crypto_unregister_alg(&blk_ecb_alg);
@@ -705,10 +819,12 @@ static void __exit aesni_exit(void)
        crypto_unregister_alg(&ablk_lrw_alg);
 #endif
 #ifdef HAS_CTR
-       crypto_unregister_alg(&ablk_ctr_alg);
+       crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
 #endif
+       crypto_unregister_alg(&ablk_ctr_alg);
        crypto_unregister_alg(&ablk_cbc_alg);
        crypto_unregister_alg(&ablk_ecb_alg);
+       crypto_unregister_alg(&blk_ctr_alg);
        crypto_unregister_alg(&blk_cbc_alg);
        crypto_unregister_alg(&blk_ecb_alg);
        crypto_unregister_alg(&__aesni_alg);
index 14cf526..280bf7f 100644 (file)
@@ -7,7 +7,66 @@
 
 #ifdef __ASSEMBLY__
 
+#define REG_NUM_INVALID                100
+
+#define REG_TYPE_R64           0
+#define REG_TYPE_XMM           1
+#define REG_TYPE_INVALID       100
+
+       .macro R64_NUM opd r64
+       \opd = REG_NUM_INVALID
+       .ifc \r64,%rax
+       \opd = 0
+       .endif
+       .ifc \r64,%rcx
+       \opd = 1
+       .endif
+       .ifc \r64,%rdx
+       \opd = 2
+       .endif
+       .ifc \r64,%rbx
+       \opd = 3
+       .endif
+       .ifc \r64,%rsp
+       \opd = 4
+       .endif
+       .ifc \r64,%rbp
+       \opd = 5
+       .endif
+       .ifc \r64,%rsi
+       \opd = 6
+       .endif
+       .ifc \r64,%rdi
+       \opd = 7
+       .endif
+       .ifc \r64,%r8
+       \opd = 8
+       .endif
+       .ifc \r64,%r9
+       \opd = 9
+       .endif
+       .ifc \r64,%r10
+       \opd = 10
+       .endif
+       .ifc \r64,%r11
+       \opd = 11
+       .endif
+       .ifc \r64,%r12
+       \opd = 12
+       .endif
+       .ifc \r64,%r13
+       \opd = 13
+       .endif
+       .ifc \r64,%r14
+       \opd = 14
+       .endif
+       .ifc \r64,%r15
+       \opd = 15
+       .endif
+       .endm
+
        .macro XMM_NUM opd xmm
+       \opd = REG_NUM_INVALID
        .ifc \xmm,%xmm0
        \opd = 0
        .endif
        .endif
        .endm
 
+       .macro REG_TYPE type reg
+       R64_NUM reg_type_r64 \reg
+       XMM_NUM reg_type_xmm \reg
+       .if reg_type_r64 <> REG_NUM_INVALID
+       \type = REG_TYPE_R64
+       .elseif reg_type_xmm <> REG_NUM_INVALID
+       \type = REG_TYPE_XMM
+       .else
+       \type = REG_TYPE_INVALID
+       .endif
+       .endm
+
        .macro PFX_OPD_SIZE
        .byte 0x66
        .endm
 
-       .macro PFX_REX opd1 opd2
-       .if (\opd1 | \opd2) & 8
-       .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1)
+       .macro PFX_REX opd1 opd2 W=0
+       .if ((\opd1 | \opd2) & 8) || \W
+       .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3)
        .endif
        .endm
 
        .byte 0x0f, 0x38, 0xdf
        MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2
        .endm
+
+       .macro MOVQ_R64_XMM opd1 opd2
+       REG_TYPE movq_r64_xmm_opd1_type \opd1
+       .if movq_r64_xmm_opd1_type == REG_TYPE_XMM
+       XMM_NUM movq_r64_xmm_opd1 \opd1
+       R64_NUM movq_r64_xmm_opd2 \opd2
+       .else
+       R64_NUM movq_r64_xmm_opd1 \opd1
+       XMM_NUM movq_r64_xmm_opd2 \opd2
+       .endif
+       PFX_OPD_SIZE
+       PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1
+       .if movq_r64_xmm_opd1_type == REG_TYPE_XMM
+       .byte 0x0f, 0x7e
+       .else
+       .byte 0x0f, 0x6e
+       .endif
+       MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2
+       .endm
 #endif
 
 #endif
index fe980da..98a6610 100644 (file)
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 
+#include <crypto/scatterwalk.h>
+
 #include "internal.h"
 
 static const char *skcipher_default_geniv __read_mostly;
 
+struct ablkcipher_buffer {
+       struct list_head        entry;
+       struct scatter_walk     dst;
+       unsigned int            len;
+       void                    *data;
+};
+
+enum {
+       ABLKCIPHER_WALK_SLOW = 1 << 0,
+};
+
+static inline void ablkcipher_buffer_write(struct ablkcipher_buffer *p)
+{
+       scatterwalk_copychunks(p->data, &p->dst, p->len, 1);
+}
+
+void __ablkcipher_walk_complete(struct ablkcipher_walk *walk)
+{
+       struct ablkcipher_buffer *p, *tmp;
+
+       list_for_each_entry_safe(p, tmp, &walk->buffers, entry) {
+               ablkcipher_buffer_write(p);
+               list_del(&p->entry);
+               kfree(p);
+       }
+}
+EXPORT_SYMBOL_GPL(__ablkcipher_walk_complete);
+
+static inline void ablkcipher_queue_write(struct ablkcipher_walk *walk,
+                                         struct ablkcipher_buffer *p)
+{
+       p->dst = walk->out;
+       list_add_tail(&p->entry, &walk->buffers);
+}
+
+/* Get a spot of the specified length that does not straddle a page.
+ * The caller needs to ensure that there is enough space for this operation.
+ */
+static inline u8 *ablkcipher_get_spot(u8 *start, unsigned int len)
+{
+       u8 *end_page = (u8 *)(((unsigned long)(start + len - 1)) & PAGE_MASK);
+       return max(start, end_page);
+}
+
+static inline unsigned int ablkcipher_done_slow(struct ablkcipher_walk *walk,
+                                               unsigned int bsize)
+{
+       unsigned int n = bsize;
+
+       for (;;) {
+               unsigned int len_this_page = scatterwalk_pagelen(&walk->out);
+
+               if (len_this_page > n)
+                       len_this_page = n;
+               scatterwalk_advance(&walk->out, n);
+               if (n == len_this_page)
+                       break;
+               n -= len_this_page;
+               scatterwalk_start(&walk->out, scatterwalk_sg_next(walk->out.sg));
+       }
+
+       return bsize;
+}
+
+static inline unsigned int ablkcipher_done_fast(struct ablkcipher_walk *walk,
+                                               unsigned int n)
+{
+       scatterwalk_advance(&walk->in, n);
+       scatterwalk_advance(&walk->out, n);
+
+       return n;
+}
+
+static int ablkcipher_walk_next(struct ablkcipher_request *req,
+                               struct ablkcipher_walk *walk);
+
+int ablkcipher_walk_done(struct ablkcipher_request *req,
+                        struct ablkcipher_walk *walk, int err)
+{
+       struct crypto_tfm *tfm = req->base.tfm;
+       unsigned int nbytes = 0;
+
+       if (likely(err >= 0)) {
+               unsigned int n = walk->nbytes - err;
+
+               if (likely(!(walk->flags & ABLKCIPHER_WALK_SLOW)))
+                       n = ablkcipher_done_fast(walk, n);
+               else if (WARN_ON(err)) {
+                       err = -EINVAL;
+                       goto err;
+               } else
+                       n = ablkcipher_done_slow(walk, n);
+
+               nbytes = walk->total - n;
+               err = 0;
+       }
+
+       scatterwalk_done(&walk->in, 0, nbytes);
+       scatterwalk_done(&walk->out, 1, nbytes);
+
+err:
+       walk->total = nbytes;
+       walk->nbytes = nbytes;
+
+       if (nbytes) {
+               crypto_yield(req->base.flags);
+               return ablkcipher_walk_next(req, walk);
+       }
+
+       if (walk->iv != req->info)
+               memcpy(req->info, walk->iv, tfm->crt_ablkcipher.ivsize);
+       if (walk->iv_buffer)
+               kfree(walk->iv_buffer);
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(ablkcipher_walk_done);
+
+static inline int ablkcipher_next_slow(struct ablkcipher_request *req,
+                                      struct ablkcipher_walk *walk,
+                                      unsigned int bsize,
+                                      unsigned int alignmask,
+                                      void **src_p, void **dst_p)
+{
+       unsigned aligned_bsize = ALIGN(bsize, alignmask + 1);
+       struct ablkcipher_buffer *p;
+       void *src, *dst, *base;
+       unsigned int n;
+
+       n = ALIGN(sizeof(struct ablkcipher_buffer), alignmask + 1);
+       n += (aligned_bsize * 3 - (alignmask + 1) +
+             (alignmask & ~(crypto_tfm_ctx_alignment() - 1)));
+
+       p = kmalloc(n, GFP_ATOMIC);
+       if (!p)
+               ablkcipher_walk_done(req, walk, -ENOMEM);
+
+       base = p + 1;
+
+       dst = (u8 *)ALIGN((unsigned long)base, alignmask + 1);
+       src = dst = ablkcipher_get_spot(dst, bsize);
+
+       p->len = bsize;
+       p->data = dst;
+
+       scatterwalk_copychunks(src, &walk->in, bsize, 0);
+
+       ablkcipher_queue_write(walk, p);
+
+       walk->nbytes = bsize;
+       walk->flags |= ABLKCIPHER_WALK_SLOW;
+
+       *src_p = src;
+       *dst_p = dst;
+
+       return 0;
+}
+
+static inline int ablkcipher_copy_iv(struct ablkcipher_walk *walk,
+                                    struct crypto_tfm *tfm,
+                                    unsigned int alignmask)
+{
+       unsigned bs = walk->blocksize;
+       unsigned int ivsize = tfm->crt_ablkcipher.ivsize;
+       unsigned aligned_bs = ALIGN(bs, alignmask + 1);
+       unsigned int size = aligned_bs * 2 + ivsize + max(aligned_bs, ivsize) -
+                           (alignmask + 1);
+       u8 *iv;
+
+       size += alignmask & ~(crypto_tfm_ctx_alignment() - 1);
+       walk->iv_buffer = kmalloc(size, GFP_ATOMIC);
+       if (!walk->iv_buffer)
+               return -ENOMEM;
+
+       iv = (u8 *)ALIGN((unsigned long)walk->iv_buffer, alignmask + 1);
+       iv = ablkcipher_get_spot(iv, bs) + aligned_bs;
+       iv = ablkcipher_get_spot(iv, bs) + aligned_bs;
+       iv = ablkcipher_get_spot(iv, ivsize);
+
+       walk->iv = memcpy(iv, walk->iv, ivsize);
+       return 0;
+}
+
+static inline int ablkcipher_next_fast(struct ablkcipher_request *req,
+                                      struct ablkcipher_walk *walk)
+{
+       walk->src.page = scatterwalk_page(&walk->in);
+       walk->src.offset = offset_in_page(walk->in.offset);
+       walk->dst.page = scatterwalk_page(&walk->out);
+       walk->dst.offset = offset_in_page(walk->out.offset);
+
+       return 0;
+}
+
+static int ablkcipher_walk_next(struct ablkcipher_request *req,
+                               struct ablkcipher_walk *walk)
+{
+       struct crypto_tfm *tfm = req->base.tfm;
+       unsigned int alignmask, bsize, n;
+       void *src, *dst;
+       int err;
+
+       alignmask = crypto_tfm_alg_alignmask(tfm);
+       n = walk->total;
+       if (unlikely(n < crypto_tfm_alg_blocksize(tfm))) {
+               req->base.flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
+               return ablkcipher_walk_done(req, walk, -EINVAL);
+       }
+
+       walk->flags &= ~ABLKCIPHER_WALK_SLOW;
+       src = dst = NULL;
+
+       bsize = min(walk->blocksize, n);
+       n = scatterwalk_clamp(&walk->in, n);
+       n = scatterwalk_clamp(&walk->out, n);
+
+       if (n < bsize ||
+           !scatterwalk_aligned(&walk->in, alignmask) ||
+           !scatterwalk_aligned(&walk->out, alignmask)) {
+               err = ablkcipher_next_slow(req, walk, bsize, alignmask,
+                                          &src, &dst);
+               goto set_phys_lowmem;
+       }
+
+       walk->nbytes = n;
+
+       return ablkcipher_next_fast(req, walk);
+
+set_phys_lowmem:
+       if (err >= 0) {
+               walk->src.page = virt_to_page(src);
+               walk->dst.page = virt_to_page(dst);
+               walk->src.offset = ((unsigned long)src & (PAGE_SIZE - 1));
+               walk->dst.offset = ((unsigned long)dst & (PAGE_SIZE - 1));
+       }
+
+       return err;
+}
+
+static int ablkcipher_walk_first(struct ablkcipher_request *req,
+                                struct ablkcipher_walk *walk)
+{
+       struct crypto_tfm *tfm = req->base.tfm;
+       unsigned int alignmask;
+
+       alignmask = crypto_tfm_alg_alignmask(tfm);
+       if (WARN_ON_ONCE(in_irq()))
+               return -EDEADLK;
+
+       walk->nbytes = walk->total;
+       if (unlikely(!walk->total))
+               return 0;
+
+       walk->iv_buffer = NULL;
+       walk->iv = req->info;
+       if (unlikely(((unsigned long)walk->iv & alignmask))) {
+               int err = ablkcipher_copy_iv(walk, tfm, alignmask);
+               if (err)
+                       return err;
+       }
+
+       scatterwalk_start(&walk->in, walk->in.sg);
+       scatterwalk_start(&walk->out, walk->out.sg);
+
+       return ablkcipher_walk_next(req, walk);
+}
+
+int ablkcipher_walk_phys(struct ablkcipher_request *req,
+                        struct ablkcipher_walk *walk)
+{
+       walk->blocksize = crypto_tfm_alg_blocksize(req->base.tfm);
+       return ablkcipher_walk_first(req, walk);
+}
+EXPORT_SYMBOL_GPL(ablkcipher_walk_phys);
+
 static int setkey_unaligned(struct crypto_ablkcipher *tfm, const u8 *key,
                            unsigned int keylen)
 {
index 76fae27..c3cf1a6 100644 (file)
@@ -544,7 +544,7 @@ int crypto_init_spawn2(struct crypto_spawn *spawn, struct crypto_alg *alg,
 {
        int err = -EINVAL;
 
-       if (frontend && (alg->cra_flags ^ frontend->type) & frontend->maskset)
+       if ((alg->cra_flags ^ frontend->type) & frontend->maskset)
                goto out;
 
        spawn->frontend = frontend;
index 05eb32e..b9884ee 100644 (file)
@@ -181,6 +181,7 @@ static void authenc_verify_ahash_update_done(struct crypto_async_request *areq,
        struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
        struct authenc_request_ctx *areq_ctx = aead_request_ctx(req);
        struct ahash_request *ahreq = (void *)(areq_ctx->tail + ctx->reqoff);
+       unsigned int cryptlen = req->cryptlen;
 
        if (err)
                goto out;
@@ -196,6 +197,7 @@ static void authenc_verify_ahash_update_done(struct crypto_async_request *areq,
                goto out;
 
        authsize = crypto_aead_authsize(authenc);
+       cryptlen -= authsize;
        ihash = ahreq->result + authsize;
        scatterwalk_map_and_copy(ihash, areq_ctx->sg, areq_ctx->cryptlen,
                                 authsize, 0);
@@ -209,7 +211,7 @@ static void authenc_verify_ahash_update_done(struct crypto_async_request *areq,
        ablkcipher_request_set_callback(abreq, aead_request_flags(req),
                                        req->base.complete, req->base.data);
        ablkcipher_request_set_crypt(abreq, req->src, req->dst,
-                                    req->cryptlen, req->iv);
+                                    cryptlen, req->iv);
 
        err = crypto_ablkcipher_decrypt(abreq);
 
@@ -228,11 +230,13 @@ static void authenc_verify_ahash_done(struct crypto_async_request *areq,
        struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
        struct authenc_request_ctx *areq_ctx = aead_request_ctx(req);
        struct ahash_request *ahreq = (void *)(areq_ctx->tail + ctx->reqoff);
+       unsigned int cryptlen = req->cryptlen;
 
        if (err)
                goto out;
 
        authsize = crypto_aead_authsize(authenc);
+       cryptlen -= authsize;
        ihash = ahreq->result + authsize;
        scatterwalk_map_and_copy(ihash, areq_ctx->sg, areq_ctx->cryptlen,
                                 authsize, 0);
@@ -246,7 +250,7 @@ static void authenc_verify_ahash_done(struct crypto_async_request *areq,
        ablkcipher_request_set_callback(abreq, aead_request_flags(req),
                                        req->base.complete, req->base.data);
        ablkcipher_request_set_crypt(abreq, req->src, req->dst,
-                                    req->cryptlen, req->iv);
+                                    cryptlen, req->iv);
 
        err = crypto_ablkcipher_decrypt(abreq);
 
index 2d22636..d4384b0 100644 (file)
@@ -6,7 +6,7 @@
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option) 
+ * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.
  *
  */
index 8020124..247178c 100644 (file)
@@ -315,16 +315,13 @@ out_free_inst:
        goto out;
 }
 
-static struct crypto_instance *pcrypt_alloc_aead(struct rtattr **tb)
+static struct crypto_instance *pcrypt_alloc_aead(struct rtattr **tb,
+                                                u32 type, u32 mask)
 {
        struct crypto_instance *inst;
        struct crypto_alg *alg;
-       struct crypto_attr_type *algt;
-
-       algt = crypto_get_attr_type(tb);
 
-       alg = crypto_get_attr_alg(tb, algt->type,
-                                 (algt->mask & CRYPTO_ALG_TYPE_MASK));
+       alg = crypto_get_attr_alg(tb, type, (mask & CRYPTO_ALG_TYPE_MASK));
        if (IS_ERR(alg))
                return ERR_CAST(alg);
 
@@ -365,7 +362,7 @@ static struct crypto_instance *pcrypt_alloc(struct rtattr **tb)
 
        switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
        case CRYPTO_ALG_TYPE_AEAD:
-               return pcrypt_alloc_aead(tb);
+               return pcrypt_alloc_aead(tb, algt->type, algt->mask);
        }
 
        return ERR_PTR(-EINVAL);
index 3de89a4..41e529a 100644 (file)
@@ -68,7 +68,7 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
 
 void scatterwalk_done(struct scatter_walk *walk, int out, int more)
 {
-       if (!offset_in_page(walk->offset) || !more)
+       if (!(scatterwalk_pagelen(walk) & (PAGE_SIZE - 1)) || !more)
                scatterwalk_pagedone(walk, out, more);
 }
 EXPORT_SYMBOL_GPL(scatterwalk_done);
index 91f7b9d..22fd943 100644 (file)
@@ -37,7 +37,7 @@ static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key,
        u8 *buffer, *alignbuffer;
        int err;
 
-       absize = keylen + (alignmask & ~(CRYPTO_MINALIGN - 1));
+       absize = keylen + (alignmask & ~(crypto_tfm_ctx_alignment() - 1));
        buffer = kmalloc(absize, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;
index a351599..3ca68f9 100644 (file)
@@ -394,6 +394,17 @@ out:
        return 0;
 }
 
+static void test_hash_sg_init(struct scatterlist *sg)
+{
+       int i;
+
+       sg_init_table(sg, TVMEMSIZE);
+       for (i = 0; i < TVMEMSIZE; i++) {
+               sg_set_buf(sg + i, tvmem[i], PAGE_SIZE);
+               memset(tvmem[i], 0xff, PAGE_SIZE);
+       }
+}
+
 static void test_hash_speed(const char *algo, unsigned int sec,
                            struct hash_speed *speed)
 {
@@ -423,12 +434,7 @@ static void test_hash_speed(const char *algo, unsigned int sec,
                goto out;
        }
 
-       sg_init_table(sg, TVMEMSIZE);
-       for (i = 0; i < TVMEMSIZE; i++) {
-               sg_set_buf(sg + i, tvmem[i], PAGE_SIZE);
-               memset(tvmem[i], 0xff, PAGE_SIZE);
-       }
-
+       test_hash_sg_init(sg);
        for (i = 0; speed[i].blen != 0; i++) {
                if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) {
                        printk(KERN_ERR
@@ -437,6 +443,9 @@ static void test_hash_speed(const char *algo, unsigned int sec,
                        goto out;
                }
 
+               if (speed[i].klen)
+                       crypto_hash_setkey(tfm, tvmem[0], speed[i].klen);
+
                printk(KERN_INFO "test%3u "
                       "(%5u byte blocks,%5u bytes per update,%4u updates): ",
                       i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen);
@@ -458,6 +467,250 @@ out:
        crypto_free_hash(tfm);
 }
 
+struct tcrypt_result {
+       struct completion completion;
+       int err;
+};
+
+static void tcrypt_complete(struct crypto_async_request *req, int err)
+{
+       struct tcrypt_result *res = req->data;
+
+       if (err == -EINPROGRESS)
+               return;
+
+       res->err = err;
+       complete(&res->completion);
+}
+
+static inline int do_one_ahash_op(struct ahash_request *req, int ret)
+{
+       if (ret == -EINPROGRESS || ret == -EBUSY) {
+               struct tcrypt_result *tr = req->base.data;
+
+               ret = wait_for_completion_interruptible(&tr->completion);
+               if (!ret)
+                       ret = tr->err;
+               INIT_COMPLETION(tr->completion);
+       }
+       return ret;
+}
+
+static int test_ahash_jiffies_digest(struct ahash_request *req, int blen,
+                                    char *out, int sec)
+{
+       unsigned long start, end;
+       int bcount;
+       int ret;
+
+       for (start = jiffies, end = start + sec * HZ, bcount = 0;
+            time_before(jiffies, end); bcount++) {
+               ret = do_one_ahash_op(req, crypto_ahash_digest(req));
+               if (ret)
+                       return ret;
+       }
+
+       printk("%6u opers/sec, %9lu bytes/sec\n",
+              bcount / sec, ((long)bcount * blen) / sec);
+
+       return 0;
+}
+
+static int test_ahash_jiffies(struct ahash_request *req, int blen,
+                             int plen, char *out, int sec)
+{
+       unsigned long start, end;
+       int bcount, pcount;
+       int ret;
+
+       if (plen == blen)
+               return test_ahash_jiffies_digest(req, blen, out, sec);
+
+       for (start = jiffies, end = start + sec * HZ, bcount = 0;
+            time_before(jiffies, end); bcount++) {
+               ret = crypto_ahash_init(req);
+               if (ret)
+                       return ret;
+               for (pcount = 0; pcount < blen; pcount += plen) {
+                       ret = do_one_ahash_op(req, crypto_ahash_update(req));
+                       if (ret)
+                               return ret;
+               }
+               /* we assume there is enough space in 'out' for the result */
+               ret = do_one_ahash_op(req, crypto_ahash_final(req));
+               if (ret)
+                       return ret;
+       }
+
+       pr_cont("%6u opers/sec, %9lu bytes/sec\n",
+               bcount / sec, ((long)bcount * blen) / sec);
+
+       return 0;
+}
+
+static int test_ahash_cycles_digest(struct ahash_request *req, int blen,
+                                   char *out)
+{
+       unsigned long cycles = 0;
+       int ret, i;
+
+       /* Warm-up run. */
+       for (i = 0; i < 4; i++) {
+               ret = do_one_ahash_op(req, crypto_ahash_digest(req));
+               if (ret)
+                       goto out;
+       }
+
+       /* The real thing. */
+       for (i = 0; i < 8; i++) {
+               cycles_t start, end;
+
+               start = get_cycles();
+
+               ret = do_one_ahash_op(req, crypto_ahash_digest(req));
+               if (ret)
+                       goto out;
+
+               end = get_cycles();
+
+               cycles += end - start;
+       }
+
+out:
+       if (ret)
+               return ret;
+
+       pr_cont("%6lu cycles/operation, %4lu cycles/byte\n",
+               cycles / 8, cycles / (8 * blen));
+
+       return 0;
+}
+
+static int test_ahash_cycles(struct ahash_request *req, int blen,
+                            int plen, char *out)
+{
+       unsigned long cycles = 0;
+       int i, pcount, ret;
+
+       if (plen == blen)
+               return test_ahash_cycles_digest(req, blen, out);
+
+       /* Warm-up run. */
+       for (i = 0; i < 4; i++) {
+               ret = crypto_ahash_init(req);
+               if (ret)
+                       goto out;
+               for (pcount = 0; pcount < blen; pcount += plen) {
+                       ret = do_one_ahash_op(req, crypto_ahash_update(req));
+                       if (ret)
+                               goto out;
+               }
+               ret = do_one_ahash_op(req, crypto_ahash_final(req));
+               if (ret)
+                       goto out;
+       }
+
+       /* The real thing. */
+       for (i = 0; i < 8; i++) {
+               cycles_t start, end;
+
+               start = get_cycles();
+
+               ret = crypto_ahash_init(req);
+               if (ret)
+                       goto out;
+               for (pcount = 0; pcount < blen; pcount += plen) {
+                       ret = do_one_ahash_op(req, crypto_ahash_update(req));
+                       if (ret)
+                               goto out;
+               }
+               ret = do_one_ahash_op(req, crypto_ahash_final(req));
+               if (ret)
+                       goto out;
+
+               end = get_cycles();
+
+               cycles += end - start;
+       }
+
+out:
+       if (ret)
+               return ret;
+
+       pr_cont("%6lu cycles/operation, %4lu cycles/byte\n",
+               cycles / 8, cycles / (8 * blen));
+
+       return 0;
+}
+
+static void test_ahash_speed(const char *algo, unsigned int sec,
+                            struct hash_speed *speed)
+{
+       struct scatterlist sg[TVMEMSIZE];
+       struct tcrypt_result tresult;
+       struct ahash_request *req;
+       struct crypto_ahash *tfm;
+       static char output[1024];
+       int i, ret;
+
+       printk(KERN_INFO "\ntesting speed of async %s\n", algo);
+
+       tfm = crypto_alloc_ahash(algo, 0, 0);
+       if (IS_ERR(tfm)) {
+               pr_err("failed to load transform for %s: %ld\n",
+                      algo, PTR_ERR(tfm));
+               return;
+       }
+
+       if (crypto_ahash_digestsize(tfm) > sizeof(output)) {
+               pr_err("digestsize(%u) > outputbuffer(%zu)\n",
+                      crypto_ahash_digestsize(tfm), sizeof(output));
+               goto out;
+       }
+
+       test_hash_sg_init(sg);
+       req = ahash_request_alloc(tfm, GFP_KERNEL);
+       if (!req) {
+               pr_err("ahash request allocation failure\n");
+               goto out;
+       }
+
+       init_completion(&tresult.completion);
+       ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+                                  tcrypt_complete, &tresult);
+
+       for (i = 0; speed[i].blen != 0; i++) {
+               if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) {
+                       pr_err("template (%u) too big for tvmem (%lu)\n",
+                              speed[i].blen, TVMEMSIZE * PAGE_SIZE);
+                       break;
+               }
+
+               pr_info("test%3u "
+                       "(%5u byte blocks,%5u bytes per update,%4u updates): ",
+                       i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen);
+
+               ahash_request_set_crypt(req, sg, output, speed[i].plen);
+
+               if (sec)
+                       ret = test_ahash_jiffies(req, speed[i].blen,
+                                                speed[i].plen, output, sec);
+               else
+                       ret = test_ahash_cycles(req, speed[i].blen,
+                                               speed[i].plen, output);
+
+               if (ret) {
+                       pr_err("hashing failed ret=%d\n", ret);
+                       break;
+               }
+       }
+
+       ahash_request_free(req);
+
+out:
+       crypto_free_ahash(tfm);
+}
+
 static void test_available(void)
 {
        char **name = check;
@@ -881,9 +1134,87 @@ static int do_test(int m)
                test_hash_speed("rmd320", sec, generic_hash_speed_template);
                if (mode > 300 && mode < 400) break;
 
+       case 318:
+               test_hash_speed("ghash-generic", sec, hash_speed_template_16);
+               if (mode > 300 && mode < 400) break;
+
        case 399:
                break;
 
+       case 400:
+               /* fall through */
+
+       case 401:
+               test_ahash_speed("md4", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 402:
+               test_ahash_speed("md5", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 403:
+               test_ahash_speed("sha1", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 404:
+               test_ahash_speed("sha256", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 405:
+               test_ahash_speed("sha384", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 406:
+               test_ahash_speed("sha512", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 407:
+               test_ahash_speed("wp256", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 408:
+               test_ahash_speed("wp384", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 409:
+               test_ahash_speed("wp512", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 410:
+               test_ahash_speed("tgr128", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 411:
+               test_ahash_speed("tgr160", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 412:
+               test_ahash_speed("tgr192", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 413:
+               test_ahash_speed("sha224", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 414:
+               test_ahash_speed("rmd128", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 415:
+               test_ahash_speed("rmd160", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 416:
+               test_ahash_speed("rmd256", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 417:
+               test_ahash_speed("rmd320", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+
+       case 499:
+               break;
+
        case 1000:
                test_available();
                break;
index 966bbfa..10cb925 100644 (file)
@@ -25,6 +25,7 @@ struct cipher_speed_template {
 struct hash_speed {
        unsigned int blen;      /* buffer length */
        unsigned int plen;      /* per-update length */
+       unsigned int klen;      /* key length */
 };
 
 /*
@@ -83,4 +84,32 @@ static struct hash_speed generic_hash_speed_template[] = {
        {  .blen = 0,   .plen = 0, }
 };
 
+static struct hash_speed hash_speed_template_16[] = {
+       { .blen = 16,   .plen = 16,     .klen = 16, },
+       { .blen = 64,   .plen = 16,     .klen = 16, },
+       { .blen = 64,   .plen = 64,     .klen = 16, },
+       { .blen = 256,  .plen = 16,     .klen = 16, },
+       { .blen = 256,  .plen = 64,     .klen = 16, },
+       { .blen = 256,  .plen = 256,    .klen = 16, },
+       { .blen = 1024, .plen = 16,     .klen = 16, },
+       { .blen = 1024, .plen = 256,    .klen = 16, },
+       { .blen = 1024, .plen = 1024,   .klen = 16, },
+       { .blen = 2048, .plen = 16,     .klen = 16, },
+       { .blen = 2048, .plen = 256,    .klen = 16, },
+       { .blen = 2048, .plen = 1024,   .klen = 16, },
+       { .blen = 2048, .plen = 2048,   .klen = 16, },
+       { .blen = 4096, .plen = 16,     .klen = 16, },
+       { .blen = 4096, .plen = 256,    .klen = 16, },
+       { .blen = 4096, .plen = 1024,   .klen = 16, },
+       { .blen = 4096, .plen = 4096,   .klen = 16, },
+       { .blen = 8192, .plen = 16,     .klen = 16, },
+       { .blen = 8192, .plen = 256,    .klen = 16, },
+       { .blen = 8192, .plen = 1024,   .klen = 16, },
+       { .blen = 8192, .plen = 4096,   .klen = 16, },
+       { .blen = 8192, .plen = 8192,   .klen = 16, },
+
+       /* End marker */
+       {  .blen = 0,   .plen = 0,      .klen = 0, }
+};
+
 #endif /* _CRYPTO_TCRYPT_H */
index c494d76..5c8aaa0 100644 (file)
@@ -153,8 +153,21 @@ static void testmgr_free_buf(char *buf[XBUFSIZE])
                free_page((unsigned long)buf[i]);
 }
 
+static int do_one_async_hash_op(struct ahash_request *req,
+                               struct tcrypt_result *tr,
+                               int ret)
+{
+       if (ret == -EINPROGRESS || ret == -EBUSY) {
+               ret = wait_for_completion_interruptible(&tr->completion);
+               if (!ret)
+                       ret = tr->err;
+               INIT_COMPLETION(tr->completion);
+       }
+       return ret;
+}
+
 static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
-                    unsigned int tcount)
+                    unsigned int tcount, bool use_digest)
 {
        const char *algo = crypto_tfm_alg_driver_name(crypto_ahash_tfm(tfm));
        unsigned int i, j, k, temp;
@@ -206,23 +219,36 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
                }
 
                ahash_request_set_crypt(req, sg, result, template[i].psize);
-               ret = crypto_ahash_digest(req);
-               switch (ret) {
-               case 0:
-                       break;
-               case -EINPROGRESS:
-               case -EBUSY:
-                       ret = wait_for_completion_interruptible(
-                               &tresult.completion);
-                       if (!ret && !(ret = tresult.err)) {
-                               INIT_COMPLETION(tresult.completion);
-                               break;
+               if (use_digest) {
+                       ret = do_one_async_hash_op(req, &tresult,
+                                                  crypto_ahash_digest(req));
+                       if (ret) {
+                               pr_err("alg: hash: digest failed on test %d "
+                                      "for %s: ret=%d\n", j, algo, -ret);
+                               goto out;
+                       }
+               } else {
+                       ret = do_one_async_hash_op(req, &tresult,
+                                                  crypto_ahash_init(req));
+                       if (ret) {
+                               pr_err("alt: hash: init failed on test %d "
+                                      "for %s: ret=%d\n", j, algo, -ret);
+                               goto out;
+                       }
+                       ret = do_one_async_hash_op(req, &tresult,
+                                                  crypto_ahash_update(req));
+                       if (ret) {
+                               pr_err("alt: hash: update failed on test %d "
+                                      "for %s: ret=%d\n", j, algo, -ret);
+                               goto out;
+                       }
+                       ret = do_one_async_hash_op(req, &tresult,
+                                                  crypto_ahash_final(req));
+                       if (ret) {
+                               pr_err("alt: hash: final failed on test %d "
+                                      "for %s: ret=%d\n", j, algo, -ret);
+                               goto out;
                        }
-                       /* fall through */
-               default:
-                       printk(KERN_ERR "alg: hash: digest failed on test %d "
-                              "for %s: ret=%d\n", j, algo, -ret);
-                       goto out;
                }
 
                if (memcmp(result, template[i].digest,
@@ -1402,7 +1428,11 @@ static int alg_test_hash(const struct alg_test_desc *desc, const char *driver,
                return PTR_ERR(tfm);
        }
 
-       err = test_hash(tfm, desc->suite.hash.vecs, desc->suite.hash.count);
+       err = test_hash(tfm, desc->suite.hash.vecs,
+                       desc->suite.hash.count, true);
+       if (!err)
+               err = test_hash(tfm, desc->suite.hash.vecs,
+                               desc->suite.hash.count, false);
 
        crypto_free_ahash(tfm);
        return err;
index fb76517..74e3537 100644 (file)
@@ -1669,17 +1669,73 @@ static struct hash_testvec aes_xcbc128_tv_template[] = {
        }
 };
 
-#define VMAC_AES_TEST_VECTORS  1
-static char vmac_string[128] = {'\x01', '\x01', '\x01', '\x01',
+#define VMAC_AES_TEST_VECTORS  8
+static char vmac_string1[128] = {'\x01', '\x01', '\x01', '\x01',
                                '\x02', '\x03', '\x02', '\x02',
                                '\x02', '\x04', '\x01', '\x07',
                                '\x04', '\x01', '\x04', '\x03',};
+static char vmac_string2[128] = {'a', 'b', 'c',};
+static char vmac_string3[128] = {'a', 'b', 'c', 'a', 'b', 'c',
+                               'a', 'b', 'c', 'a', 'b', 'c',
+                               'a', 'b', 'c', 'a', 'b', 'c',
+                               'a', 'b', 'c', 'a', 'b', 'c',
+                               'a', 'b', 'c', 'a', 'b', 'c',
+                               'a', 'b', 'c', 'a', 'b', 'c',
+                               'a', 'b', 'c', 'a', 'b', 'c',
+                               'a', 'b', 'c', 'a', 'b', 'c',
+                               };
+
 static struct hash_testvec aes_vmac128_tv_template[] = {
        {
+               .key    = "\x00\x01\x02\x03\x04\x05\x06\x07"
+                         "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+               .plaintext = NULL,
+               .digest = "\x07\x58\x80\x35\x77\xa4\x7b\x54",
+               .psize  = 0,
+               .ksize  = 16,
+       }, {
+               .key    = "\x00\x01\x02\x03\x04\x05\x06\x07"
+                         "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+               .plaintext = vmac_string1,
+               .digest = "\xce\xf5\x3c\xd3\xae\x68\x8c\xa1",
+               .psize  = 128,
+               .ksize  = 16,
+       }, {
+               .key    = "\x00\x01\x02\x03\x04\x05\x06\x07"
+                         "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+               .plaintext = vmac_string2,
+               .digest = "\xc9\x27\xb0\x73\x81\xbd\x14\x2d",
+               .psize  = 128,
+               .ksize  = 16,
+       }, {
                .key    = "\x00\x01\x02\x03\x04\x05\x06\x07"
                          "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-               .plaintext = vmac_string,
-               .digest = "\xcb\xd7\x8a\xfd\xb7\x33\x79\xe7",
+               .plaintext = vmac_string3,
+               .digest = "\x8d\x1a\x95\x8c\x98\x47\x0b\x19",
+               .psize  = 128,
+               .ksize  = 16,
+       }, {
+               .key    = "abcdefghijklmnop",
+               .plaintext = NULL,
+               .digest = "\x3b\x89\xa1\x26\x9e\x55\x8f\x84",
+               .psize  = 0,
+               .ksize  = 16,
+       }, {
+               .key    = "abcdefghijklmnop",
+               .plaintext = vmac_string1,
+               .digest = "\xab\x5e\xab\xb0\xf6\x8d\x74\xc2",
+               .psize  = 128,
+               .ksize  = 16,
+       }, {
+               .key    = "abcdefghijklmnop",
+               .plaintext = vmac_string2,
+               .digest = "\x11\x15\x68\x42\x3d\x7b\x09\xdf",
+               .psize  = 128,
+               .ksize  = 16,
+       }, {
+               .key    = "abcdefghijklmnop",
+               .plaintext = vmac_string3,
+               .digest = "\x8b\x32\x8f\xe1\xed\x8f\xfa\xd4",
                .psize  = 128,
                .ksize  = 16,
        },
index 0a9468e..0999274 100644 (file)
@@ -43,6 +43,8 @@ const u64 m63   = UINT64_C(0x7fffffffffffffff);  /* 63-bit mask       */
 const u64 m64   = UINT64_C(0xffffffffffffffff);  /* 64-bit mask       */
 const u64 mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
 
+#define pe64_to_cpup le64_to_cpup              /* Prefer little endian */
+
 #ifdef __LITTLE_ENDIAN
 #define INDEX_HIGH 1
 #define INDEX_LOW 0
@@ -110,8 +112,8 @@ const u64 mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
                int i; u64 th, tl;                                      \
                rh = rl = 0;                                            \
                for (i = 0; i < nw; i += 2) {                           \
-                       MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i],     \
-                               le64_to_cpup((mp)+i+1)+(kp)[i+1]);      \
+                       MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i],     \
+                               pe64_to_cpup((mp)+i+1)+(kp)[i+1]);      \
                        ADD128(rh, rl, th, tl);                         \
                }                                                       \
        } while (0)
@@ -121,11 +123,11 @@ const u64 mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
                int i; u64 th, tl;                                      \
                rh1 = rl1 = rh = rl = 0;                                \
                for (i = 0; i < nw; i += 2) {                           \
-                       MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i],     \
-                               le64_to_cpup((mp)+i+1)+(kp)[i+1]);      \
+                       MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i],     \
+                               pe64_to_cpup((mp)+i+1)+(kp)[i+1]);      \
                        ADD128(rh, rl, th, tl);                         \
-                       MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i+2],   \
-                               le64_to_cpup((mp)+i+1)+(kp)[i+3]);      \
+                       MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i+2],   \
+                               pe64_to_cpup((mp)+i+1)+(kp)[i+3]);      \
                        ADD128(rh1, rl1, th, tl);                       \
                }                                                       \
        } while (0)
@@ -136,17 +138,17 @@ const u64 mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
                int i; u64 th, tl;                                      \
                rh = rl = 0;                                            \
                for (i = 0; i < nw; i += 8) {                           \
-                       MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i],     \
-                               le64_to_cpup((mp)+i+1)+(kp)[i+1]);      \
+                       MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i],     \
+                               pe64_to_cpup((mp)+i+1)+(kp)[i+1]);      \
                        ADD128(rh, rl, th, tl);                         \
-                       MUL64(th, tl, le64_to_cpup((mp)+i+2)+(kp)[i+2], \
-                               le64_to_cpup((mp)+i+3)+(kp)[i+3]);      \
+                       MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+2], \
+                               pe64_to_cpup((mp)+i+3)+(kp)[i+3]);      \
                        ADD128(rh, rl, th, tl);                         \
-                       MUL64(th, tl, le64_to_cpup((mp)+i+4)+(kp)[i+4], \
-                               le64_to_cpup((mp)+i+5)+(kp)[i+5]);      \
+                       MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+4], \
+                               pe64_to_cpup((mp)+i+5)+(kp)[i+5]);      \
                        ADD128(rh, rl, th, tl);                         \
-                       MUL64(th, tl, le64_to_cpup((mp)+i+6)+(kp)[i+6], \
-                               le64_to_cpup((mp)+i+7)+(kp)[i+7]);      \
+                       MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+6], \
+                               pe64_to_cpup((mp)+i+7)+(kp)[i+7]);      \
                        ADD128(rh, rl, th, tl);                         \
                }                                                       \
        } while (0)
@@ -156,29 +158,29 @@ const u64 mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
                int i; u64 th, tl;                                      \
                rh1 = rl1 = rh = rl = 0;                                \
                for (i = 0; i < nw; i += 8) {                           \
-                       MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i],     \
-                               le64_to_cpup((mp)+i+1)+(kp)[i+1]);      \
+                       MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i],     \
+                               pe64_to_cpup((mp)+i+1)+(kp)[i+1]);      \
                        ADD128(rh, rl, th, tl);                         \
-                       MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i+2],   \
-                               le64_to_cpup((mp)+i+1)+(kp)[i+3]);      \
+                       MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i+2],   \
+                               pe64_to_cpup((mp)+i+1)+(kp)[i+3]);      \
                        ADD128(rh1, rl1, th, tl);                       \
-                       MUL64(th, tl, le64_to_cpup((mp)+i+2)+(kp)[i+2], \
-                               le64_to_cpup((mp)+i+3)+(kp)[i+3]);      \
+                       MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+2], \
+                               pe64_to_cpup((mp)+i+3)+(kp)[i+3]);      \
                        ADD128(rh, rl, th, tl);                         \
-                       MUL64(th, tl, le64_to_cpup((mp)+i+2)+(kp)[i+4], \
-                               le64_to_cpup((mp)+i+3)+(kp)[i+5]);      \
+                       MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+4], \
+                               pe64_to_cpup((mp)+i+3)+(kp)[i+5]);      \
                        ADD128(rh1, rl1, th, tl);                       \
-                       MUL64(th, tl, le64_to_cpup((mp)+i+4)+(kp)[i+4], \
-                               le64_to_cpup((mp)+i+5)+(kp)[i+5]);      \
+                       MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+4], \
+                               pe64_to_cpup((mp)+i+5)+(kp)[i+5]);      \
                        ADD128(rh, rl, th, tl);                         \
-                       MUL64(th, tl, le64_to_cpup((mp)+i+4)+(kp)[i+6], \
-                               le64_to_cpup((mp)+i+5)+(kp)[i+7]);      \
+                       MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+6], \
+                               pe64_to_cpup((mp)+i+5)+(kp)[i+7]);      \
                        ADD128(rh1, rl1, th, tl);                       \
-                       MUL64(th, tl, le64_to_cpup((mp)+i+6)+(kp)[i+6], \
-                               le64_to_cpup((mp)+i+7)+(kp)[i+7]);      \
+                       MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+6], \
+                               pe64_to_cpup((mp)+i+7)+(kp)[i+7]);      \
                        ADD128(rh, rl, th, tl);                         \
-                       MUL64(th, tl, le64_to_cpup((mp)+i+6)+(kp)[i+8], \
-                               le64_to_cpup((mp)+i+7)+(kp)[i+9]);      \
+                       MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+8], \
+                               pe64_to_cpup((mp)+i+7)+(kp)[i+9]);      \
                        ADD128(rh1, rl1, th, tl);                       \
                }                                                       \
        } while (0)
@@ -216,8 +218,8 @@ const u64 mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
                int i;                                                  \
                rh = rl = t = 0;                                        \
                for (i = 0; i < nw; i += 2)  {                          \
-                       t1 = le64_to_cpup(mp+i) + kp[i];                \
-                       t2 = le64_to_cpup(mp+i+1) + kp[i+1];            \
+                       t1 = pe64_to_cpup(mp+i) + kp[i];                \
+                       t2 = pe64_to_cpup(mp+i+1) + kp[i+1];            \
                        m2 = MUL32(t1 >> 32, t2);                       \
                        m1 = MUL32(t1, t2 >> 32);                       \
                        ADD128(rh, rl, MUL32(t1 >> 32, t2 >> 32),       \
@@ -322,8 +324,7 @@ static void vhash_abort(struct vmac_ctx *ctx)
        ctx->first_block_processed = 0;
 }
 
-static u64 l3hash(u64 p1, u64 p2,
-                       u64 k1, u64 k2, u64 len)
+static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len)
 {
        u64 rh, rl, t, z = 0;
 
@@ -474,7 +475,7 @@ static u64 vmac(unsigned char m[], unsigned int mbytes,
        }
        p = be64_to_cpup(out_p + i);
        h = vhash(m, mbytes, (u64 *)0, &ctx->__vmac_ctx);
-       return p + h;
+       return le64_to_cpu(p + h);
 }
 
 static int vmac_set_key(unsigned char user_key[], struct vmac_ctx_t *ctx)
@@ -549,10 +550,6 @@ static int vmac_setkey(struct crypto_shash *parent,
 
 static int vmac_init(struct shash_desc *pdesc)
 {
-       struct crypto_shash *parent = pdesc->tfm;
-       struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
-
-       memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx));
        return 0;
 }
 
index 2fd3d39..8d85587 100644 (file)
 #define INPUT_POOL_WORDS 128
 #define OUTPUT_POOL_WORDS 32
 #define SEC_XFER_SIZE 512
+#define EXTRACT_SIZE 10
 
 /*
  * The minimum number of bits of entropy before we wake up a read on
@@ -414,7 +415,7 @@ struct entropy_store {
        unsigned add_ptr;
        int entropy_count;
        int input_rotate;
-       __u8 *last_data;
+       __u8 last_data[EXTRACT_SIZE];
 };
 
 static __u32 input_pool_data[INPUT_POOL_WORDS];
@@ -714,8 +715,6 @@ void add_disk_randomness(struct gendisk *disk)
 }
 #endif
 
-#define EXTRACT_SIZE 10
-
 /*********************************************************************
  *
  * Entropy extraction routines
@@ -862,7 +861,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf,
        while (nbytes) {
                extract_buf(r, tmp);
 
-               if (r->last_data) {
+               if (fips_enabled) {
                        spin_lock_irqsave(&r->lock, flags);
                        if (!memcmp(tmp, r->last_data, EXTRACT_SIZE))
                                panic("Hardware RNG duplicated output!\n");
@@ -951,9 +950,6 @@ static void init_std_data(struct entropy_store *r)
        now = ktime_get_real();
        mix_pool_bytes(r, &now, sizeof(now));
        mix_pool_bytes(r, utsname(), sizeof(*(utsname())));
-       /* Enable continuous test in fips mode */
-       if (fips_enabled)
-               r->last_data = kmalloc(EXTRACT_SIZE, GFP_KERNEL);
 }
 
 static int rand_initialize(void)
index b08403d..fbf94cf 100644 (file)
@@ -170,6 +170,18 @@ config CRYPTO_DEV_MV_CESA
 
          Currently the driver supports AES in ECB and CBC mode without DMA.
 
+config CRYPTO_DEV_NIAGARA2
+       tristate "Niagara2 Stream Processing Unit driver"
+       select CRYPTO_ALGAPI
+       depends on SPARC64
+       help
+         Each core of a Niagara2 processor contains a Stream
+         Processing Unit, which itself contains several cryptographic
+         sub-units.  One set provides the Modular Arithmetic Unit,
+         used for SSL offload.  The other set provides the Cipher
+         Group, which can perform encryption, decryption, hashing,
+         checksumming, and raw copies.
+
 config CRYPTO_DEV_HIFN_795X
        tristate "Driver HIFN 795x crypto accelerator chips"
        select CRYPTO_DES
@@ -222,4 +234,13 @@ config CRYPTO_DEV_PPC4XX
        help
          This option allows you to have support for AMCC crypto acceleration.
 
+config CRYPTO_DEV_OMAP_SHAM
+       tristate "Support for OMAP SHA1/MD5 hw accelerator"
+       depends on ARCH_OMAP2 || ARCH_OMAP3
+       select CRYPTO_SHA1
+       select CRYPTO_MD5
+       help
+         OMAP processors have SHA1/MD5 hw accelerator. Select this if you
+         want to use the OMAP module for SHA1/MD5 algorithms.
+
 endif # CRYPTO_HW
index 6ffcb3f..6dbbe00 100644 (file)
@@ -1,8 +1,12 @@
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o
 obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
+obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
+n2_crypto-objs := n2_core.o n2_asm.o
 obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
 obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
 obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
 obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/
+obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
+
index c7a5a43..09389dd 100644 (file)
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
 
-#include <asm/io.h>
-#include <asm/delay.h>
+#include <linux/io.h>
+#include <linux/delay.h>
 
 #include "geode-aes.h"
 
 /* Static structures */
 
-static void __iomem * _iobase;
+static void __iomem *_iobase;
 static spinlock_t lock;
 
 /* Write a 128 bit field (either a writable key or IV) */
@@ -30,7 +30,7 @@ static inline void
 _writefield(u32 offset, void *value)
 {
        int i;
-       for(i = 0; i < 4; i++)
+       for (i = 0; i < 4; i++)
                iowrite32(((u32 *) value)[i], _iobase + offset + (i * 4));
 }
 
@@ -39,7 +39,7 @@ static inline void
 _readfield(u32 offset, void *value)
 {
        int i;
-       for(i = 0; i < 4; i++)
+       for (i = 0; i < 4; i++)
                ((u32 *) value)[i] = ioread32(_iobase + offset + (i * 4));
 }
 
@@ -59,7 +59,7 @@ do_crypt(void *src, void *dst, int len, u32 flags)
        do {
                status = ioread32(_iobase + AES_INTR_REG);
                cpu_relax();
-       } while(!(status & AES_INTRA_PENDING) && --counter);
+       } while (!(status & AES_INTRA_PENDING) && --counter);
 
        /* Clear the event */
        iowrite32((status & 0xFF) | AES_INTRA_PENDING, _iobase + AES_INTR_REG);
@@ -317,7 +317,7 @@ geode_cbc_decrypt(struct blkcipher_desc *desc,
        err = blkcipher_walk_virt(desc, &walk);
        op->iv = walk.iv;
 
-       while((nbytes = walk.nbytes)) {
+       while ((nbytes = walk.nbytes)) {
                op->src = walk.src.virt.addr,
                op->dst = walk.dst.virt.addr;
                op->mode = AES_MODE_CBC;
@@ -349,7 +349,7 @@ geode_cbc_encrypt(struct blkcipher_desc *desc,
        err = blkcipher_walk_virt(desc, &walk);
        op->iv = walk.iv;
 
-       while((nbytes = walk.nbytes)) {
+       while ((nbytes = walk.nbytes)) {
                op->src = walk.src.virt.addr,
                op->dst = walk.dst.virt.addr;
                op->mode = AES_MODE_CBC;
@@ -429,7 +429,7 @@ geode_ecb_decrypt(struct blkcipher_desc *desc,
        blkcipher_walk_init(&walk, dst, src, nbytes);
        err = blkcipher_walk_virt(desc, &walk);
 
-       while((nbytes = walk.nbytes)) {
+       while ((nbytes = walk.nbytes)) {
                op->src = walk.src.virt.addr,
                op->dst = walk.dst.virt.addr;
                op->mode = AES_MODE_ECB;
@@ -459,7 +459,7 @@ geode_ecb_encrypt(struct blkcipher_desc *desc,
        blkcipher_walk_init(&walk, dst, src, nbytes);
        err = blkcipher_walk_virt(desc, &walk);
 
-       while((nbytes = walk.nbytes)) {
+       while ((nbytes = walk.nbytes)) {
                op->src = walk.src.virt.addr,
                op->dst = walk.dst.virt.addr;
                op->mode = AES_MODE_ECB;
@@ -518,11 +518,12 @@ static int __devinit
 geode_aes_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
        int ret;
-
-       if ((ret = pci_enable_device(dev)))
+       ret = pci_enable_device(dev);
+       if (ret)
                return ret;
 
-       if ((ret = pci_request_regions(dev, "geode-aes")))
+       ret = pci_request_regions(dev, "geode-aes");
+       if (ret)
                goto eenable;
 
        _iobase = pci_iomap(dev, 0, 0);
@@ -537,13 +538,16 @@ geode_aes_probe(struct pci_dev *dev, const struct pci_device_id *id)
        /* Clear any pending activity */
        iowrite32(AES_INTR_PENDING | AES_INTR_MASK, _iobase + AES_INTR_REG);
 
-       if ((ret = crypto_register_alg(&geode_alg)))
+       ret = crypto_register_alg(&geode_alg);
+       if (ret)
                goto eiomap;
 
-       if ((ret = crypto_register_alg(&geode_ecb_alg)))
+       ret = crypto_register_alg(&geode_ecb_alg);
+       if (ret)
                goto ealg;
 
-       if ((ret = crypto_register_alg(&geode_cbc_alg)))
+       ret = crypto_register_alg(&geode_cbc_alg);
+       if (ret)
                goto eecb;
 
        printk(KERN_NOTICE "geode-aes: GEODE AES engine enabled.\n");
index 73e8b17..16fce3a 100644 (file)
@@ -638,7 +638,7 @@ struct hifn_crypto_alg
 
 #define ASYNC_FLAGS_MISALIGNED (1<<0)
 
-struct ablkcipher_walk
+struct hifn_cipher_walk
 {
        struct scatterlist      cache[ASYNC_SCATTERLIST_CACHE];
        u32                     flags;
@@ -657,7 +657,7 @@ struct hifn_request_context
        u8                      *iv;
        unsigned int            ivsize;
        u8                      op, type, mode, unused;
-       struct ablkcipher_walk  walk;
+       struct hifn_cipher_walk walk;
 };
 
 #define crypto_alg_to_hifn(a)  container_of(a, struct hifn_crypto_alg, alg)
@@ -1417,7 +1417,7 @@ static int hifn_setup_dma(struct hifn_device *dev,
        return 0;
 }
 
-static int ablkcipher_walk_init(struct ablkcipher_walk *w,
+static int hifn_cipher_walk_init(struct hifn_cipher_walk *w,
                int num, gfp_t gfp_flags)
 {
        int i;
@@ -1442,7 +1442,7 @@ static int ablkcipher_walk_init(struct ablkcipher_walk *w,
        return i;
 }
 
-static void ablkcipher_walk_exit(struct ablkcipher_walk *w)
+static void hifn_cipher_walk_exit(struct hifn_cipher_walk *w)
 {
        int i;
 
@@ -1486,8 +1486,8 @@ static int ablkcipher_add(unsigned int *drestp, struct scatterlist *dst,
        return idx;
 }
 
-static int ablkcipher_walk(struct ablkcipher_request *req,
-               struct ablkcipher_walk *w)
+static int hifn_cipher_walk(struct ablkcipher_request *req,
+               struct hifn_cipher_walk *w)
 {
        struct scatterlist *dst, *t;
        unsigned int nbytes = req->nbytes, offset, copy, diff;
@@ -1600,12 +1600,12 @@ static int hifn_setup_session(struct ablkcipher_request *req)
        }
 
        if (rctx->walk.flags & ASYNC_FLAGS_MISALIGNED) {
-               err = ablkcipher_walk_init(&rctx->walk, idx, GFP_ATOMIC);
+               err = hifn_cipher_walk_init(&rctx->walk, idx, GFP_ATOMIC);
                if (err < 0)
                        return err;
        }
 
-       sg_num = ablkcipher_walk(req, &rctx->walk);
+       sg_num = hifn_cipher_walk(req, &rctx->walk);
        if (sg_num < 0) {
                err = sg_num;
                goto err_out_exit;
@@ -1806,7 +1806,7 @@ static void hifn_process_ready(struct ablkcipher_request *req, int error)
                        kunmap_atomic(saddr, KM_SOFTIRQ0);
                }
 
-               ablkcipher_walk_exit(&rctx->walk);
+               hifn_cipher_walk_exit(&rctx->walk);
        }
 
        req->base.complete(&req->base, error);
index 6f29012..e095422 100644 (file)
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
 
 #include "mv_cesa.h"
+
+#define MV_CESA        "MV-CESA:"
+#define MAX_HW_HASH_SIZE       0xFFFF
+
 /*
  * STM:
  *   /---------------------------------------\
@@ -39,10 +45,12 @@ enum engine_status {
  * @dst_sg_it:         sg iterator for dst
  * @sg_src_left:       bytes left in src to process (scatter list)
  * @src_start:         offset to add to src start position (scatter list)
- * @crypt_len:         length of current crypt process
+ * @crypt_len:         length of current hw crypt/hash process
+ * @hw_nbytes:         total bytes to process in hw for this request
+ * @copy_back:         whether to copy data back (crypt) or not (hash)
  * @sg_dst_left:       bytes left dst to process in this scatter list
  * @dst_start:         offset to add to dst start position (scatter list)
- * @total_req_bytes:   total number of bytes processed (request).
+ * @hw_processed_bytes:        number of bytes processed by hw (request).
  *
  * sg helper are used to iterate over the scatterlist. Since the size of the
  * SRAM may be less than the scatter size, this struct struct is used to keep
@@ -51,15 +59,19 @@ enum engine_status {
 struct req_progress {
        struct sg_mapping_iter src_sg_it;
        struct sg_mapping_iter dst_sg_it;
+       void (*complete) (void);
+       void (*process) (int is_first);
 
        /* src mostly */
        int sg_src_left;
        int src_start;
        int crypt_len;
+       int hw_nbytes;
        /* dst mostly */
+       int copy_back;
        int sg_dst_left;
        int dst_start;
-       int total_req_bytes;
+       int hw_processed_bytes;
 };
 
 struct crypto_priv {
@@ -72,10 +84,12 @@ struct crypto_priv {
        spinlock_t lock;
        struct crypto_queue queue;
        enum engine_status eng_st;
-       struct ablkcipher_request *cur_req;
+       struct crypto_async_request *cur_req;
        struct req_progress p;
        int max_req_size;
        int sram_size;
+       int has_sha1;
+       int has_hmac_sha1;
 };
 
 static struct crypto_priv *cpg;
@@ -97,6 +111,31 @@ struct mv_req_ctx {
        int decrypt;
 };
 
+enum hash_op {
+       COP_SHA1,
+       COP_HMAC_SHA1
+};
+
+struct mv_tfm_hash_ctx {
+       struct crypto_shash *fallback;
+       struct crypto_shash *base_hash;
+       u32 ivs[2 * SHA1_DIGEST_SIZE / 4];
+       int count_add;
+       enum hash_op op;
+};
+
+struct mv_req_hash_ctx {
+       u64 count;
+       u32 state[SHA1_DIGEST_SIZE / 4];
+       u8 buffer[SHA1_BLOCK_SIZE];
+       int first_hash;         /* marks that we don't have previous state */
+       int last_chunk;         /* marks that this is the 'final' request */
+       int extra_bytes;        /* unprocessed bytes in buffer */
+       enum hash_op op;
+       int count_add;
+       struct scatterlist dummysg;
+};
+
 static void compute_aes_dec_key(struct mv_ctx *ctx)
 {
        struct crypto_aes_ctx gen_aes_key;
@@ -144,32 +183,51 @@ static int mv_setkey_aes(struct crypto_ablkcipher *cipher, const u8 *key,
        return 0;
 }
 
-static void setup_data_in(struct ablkcipher_request *req)
+static void copy_src_to_buf(struct req_progress *p, char *dbuf, int len)
 {
        int ret;
-       void *buf;
+       void *sbuf;
+       int copied = 0;
 
-       if (!cpg->p.sg_src_left) {
-               ret = sg_miter_next(&cpg->p.src_sg_it);
-               BUG_ON(!ret);
-               cpg->p.sg_src_left = cpg->p.src_sg_it.length;
-               cpg->p.src_start = 0;
-       }
-
-       cpg->p.crypt_len = min(cpg->p.sg_src_left, cpg->max_req_size);
-
-       buf = cpg->p.src_sg_it.addr;
-       buf += cpg->p.src_start;
+       while (1) {
+               if (!p->sg_src_left) {
+                       ret = sg_miter_next(&p->src_sg_it);
+                       BUG_ON(!ret);
+                       p->sg_src_left = p->src_sg_it.length;
+                       p->src_start = 0;
+               }
 
-       memcpy(cpg->sram + SRAM_DATA_IN_START, buf, cpg->p.crypt_len);
+               sbuf = p->src_sg_it.addr + p->src_start;
+
+               if (p->sg_src_left <= len - copied) {
+                       memcpy(dbuf + copied, sbuf, p->sg_src_left);
+                       copied += p->sg_src_left;
+                       p->sg_src_left = 0;
+                       if (copied >= len)
+                               break;
+               } else {
+                       int copy_len = len - copied;
+                       memcpy(dbuf + copied, sbuf, copy_len);
+                       p->src_start += copy_len;
+                       p->sg_src_left -= copy_len;
+                       break;
+               }
+       }
+}
 
-       cpg->p.sg_src_left -= cpg->p.crypt_len;
-       cpg->p.src_start += cpg->p.crypt_len;
+static void setup_data_in(void)
+{
+       struct req_progress *p = &cpg->p;
+       int data_in_sram =
+           min(p->hw_nbytes - p->hw_processed_bytes, cpg->max_req_size);
+       copy_src_to_buf(p, cpg->sram + SRAM_DATA_IN_START + p->crypt_len,
+                       data_in_sram - p->crypt_len);
+       p->crypt_len = data_in_sram;
 }
 
 static void mv_process_current_q(int first_block)
 {
-       struct ablkcipher_request *req = cpg->cur_req;
+       struct ablkcipher_request *req = ablkcipher_request_cast(cpg->cur_req);
        struct mv_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
        struct mv_req_ctx *req_ctx = ablkcipher_request_ctx(req);
        struct sec_accel_config op;
@@ -179,6 +237,7 @@ static void mv_process_current_q(int first_block)
                op.config = CFG_OP_CRYPT_ONLY | CFG_ENCM_AES | CFG_ENC_MODE_ECB;
                break;
        case COP_AES_CBC:
+       default:
                op.config = CFG_OP_CRYPT_ONLY | CFG_ENCM_AES | CFG_ENC_MODE_CBC;
                op.enc_iv = ENC_IV_POINT(SRAM_DATA_IV) |
                        ENC_IV_BUF_POINT(SRAM_DATA_IV_BUF);
@@ -211,7 +270,7 @@ static void mv_process_current_q(int first_block)
                ENC_P_DST(SRAM_DATA_OUT_START);
        op.enc_key_p = SRAM_DATA_KEY_P;
 
-       setup_data_in(req);
+       setup_data_in();
        op.enc_len = cpg->p.crypt_len;
        memcpy(cpg->sram + SRAM_CONFIG, &op,
                        sizeof(struct sec_accel_config));
@@ -228,91 +287,294 @@ static void mv_process_current_q(int first_block)
 
 static void mv_crypto_algo_completion(void)
 {
-       struct ablkcipher_request *req = cpg->cur_req;
+       struct ablkcipher_request *req = ablkcipher_request_cast(cpg->cur_req);
        struct mv_req_ctx *req_ctx = ablkcipher_request_ctx(req);
 
+       sg_miter_stop(&cpg->p.src_sg_it);
+       sg_miter_stop(&cpg->p.dst_sg_it);
+
        if (req_ctx->op != COP_AES_CBC)
                return ;
 
        memcpy(req->info, cpg->sram + SRAM_DATA_IV_BUF, 16);
 }
 
+static void mv_process_hash_current(int first_block)
+{
+       struct ahash_request *req = ahash_request_cast(cpg->cur_req);
+       struct mv_req_hash_ctx *req_ctx = ahash_request_ctx(req);
+       struct req_progress *p = &cpg->p;
+       struct sec_accel_config op = { 0 };
+       int is_last;
+
+       switch (req_ctx->op) {
+       case COP_SHA1:
+       default:
+               op.config = CFG_OP_MAC_ONLY | CFG_MACM_SHA1;
+               break;
+       case COP_HMAC_SHA1:
+               op.config = CFG_OP_MAC_ONLY | CFG_MACM_HMAC_SHA1;
+               break;
+       }
+
+       op.mac_src_p =
+               MAC_SRC_DATA_P(SRAM_DATA_IN_START) | MAC_SRC_TOTAL_LEN((u32)
+               req_ctx->
+               count);
+
+       setup_data_in();
+
+       op.mac_digest =
+               MAC_DIGEST_P(SRAM_DIGEST_BUF) | MAC_FRAG_LEN(p->crypt_len);
+       op.mac_iv =
+               MAC_INNER_IV_P(SRAM_HMAC_IV_IN) |
+               MAC_OUTER_IV_P(SRAM_HMAC_IV_OUT);
+
+       is_last = req_ctx->last_chunk
+               && (p->hw_processed_bytes + p->crypt_len >= p->hw_nbytes)
+               && (req_ctx->count <= MAX_HW_HASH_SIZE);
+       if (req_ctx->first_hash) {
+               if (is_last)
+                       op.config |= CFG_NOT_FRAG;
+               else
+                       op.config |= CFG_FIRST_FRAG;
+
+               req_ctx->first_hash = 0;
+       } else {
+               if (is_last)
+                       op.config |= CFG_LAST_FRAG;
+               else
+                       op.config |= CFG_MID_FRAG;
+       }
+
+       memcpy(cpg->sram + SRAM_CONFIG, &op, sizeof(struct sec_accel_config));
+
+       writel(SRAM_CONFIG, cpg->reg + SEC_ACCEL_DESC_P0);
+       /* GO */
+       writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD);
+
+       /*
+       * XXX: add timer if the interrupt does not occur for some mystery
+       * reason
+       */
+}
+
+static inline int mv_hash_import_sha1_ctx(const struct mv_req_hash_ctx *ctx,
+                                         struct shash_desc *desc)
+{
+       int i;
+       struct sha1_state shash_state;
+
+       shash_state.count = ctx->count + ctx->count_add;
+       for (i = 0; i < 5; i++)
+               shash_state.state[i] = ctx->state[i];
+       memcpy(shash_state.buffer, ctx->buffer, sizeof(shash_state.buffer));
+       return crypto_shash_import(desc, &shash_state);
+}
+
+static int mv_hash_final_fallback(struct ahash_request *req)
+{
+       const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm);
+       struct mv_req_hash_ctx *req_ctx = ahash_request_ctx(req);
+       struct {
+               struct shash_desc shash;
+               char ctx[crypto_shash_descsize(tfm_ctx->fallback)];
+       } desc;
+       int rc;
+
+       desc.shash.tfm = tfm_ctx->fallback;
+       desc.shash.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+       if (unlikely(req_ctx->first_hash)) {
+               crypto_shash_init(&desc.shash);
+               crypto_shash_update(&desc.shash, req_ctx->buffer,
+                                   req_ctx->extra_bytes);
+       } else {
+               /* only SHA1 for now....
+                */
+               rc = mv_hash_import_sha1_ctx(req_ctx, &desc.shash);
+               if (rc)
+                       goto out;
+       }
+       rc = crypto_shash_final(&desc.shash, req->result);
+out:
+       return rc;
+}
+
+static void mv_hash_algo_completion(void)
+{
+       struct ahash_request *req = ahash_request_cast(cpg->cur_req);
+       struct mv_req_hash_ctx *ctx = ahash_request_ctx(req);
+
+       if (ctx->extra_bytes)
+               copy_src_to_buf(&cpg->p, ctx->buffer, ctx->extra_bytes);
+       sg_miter_stop(&cpg->p.src_sg_it);
+
+       ctx->state[0] = readl(cpg->reg + DIGEST_INITIAL_VAL_A);
+       ctx->state[1] = readl(cpg->reg + DIGEST_INITIAL_VAL_B);
+       ctx->state[2] = readl(cpg->reg + DIGEST_INITIAL_VAL_C);
+       ctx->state[3] = readl(cpg->reg + DIGEST_INITIAL_VAL_D);
+       ctx->state[4] = readl(cpg->reg + DIGEST_INITIAL_VAL_E);
+
+       if (likely(ctx->last_chunk)) {
+               if (likely(ctx->count <= MAX_HW_HASH_SIZE)) {
+                       memcpy(req->result, cpg->sram + SRAM_DIGEST_BUF,
+                              crypto_ahash_digestsize(crypto_ahash_reqtfm
+                                                      (req)));
+               } else
+                       mv_hash_final_fallback(req);
+       }
+}
+
 static void dequeue_complete_req(void)
 {
-       struct ablkcipher_request *req = cpg->cur_req;
+       struct crypto_async_request *req = cpg->cur_req;
        void *buf;
        int ret;
+       cpg->p.hw_processed_bytes += cpg->p.crypt_len;
+       if (cpg->p.copy_back) {
+               int need_copy_len = cpg->p.crypt_len;
+               int sram_offset = 0;
+               do {
+                       int dst_copy;
+
+                       if (!cpg->p.sg_dst_left) {
+                               ret = sg_miter_next(&cpg->p.dst_sg_it);
+                               BUG_ON(!ret);
+                               cpg->p.sg_dst_left = cpg->p.dst_sg_it.length;
+                               cpg->p.dst_start = 0;
+                       }
 
-       cpg->p.total_req_bytes += cpg->p.crypt_len;
-       do {
-               int dst_copy;
-
-               if (!cpg->p.sg_dst_left) {
-                       ret = sg_miter_next(&cpg->p.dst_sg_it);
-                       BUG_ON(!ret);
-                       cpg->p.sg_dst_left = cpg->p.dst_sg_it.length;
-                       cpg->p.dst_start = 0;
-               }
-
-               buf = cpg->p.dst_sg_it.addr;
-               buf += cpg->p.dst_start;
+                       buf = cpg->p.dst_sg_it.addr;
+                       buf += cpg->p.dst_start;
 
-               dst_copy = min(cpg->p.crypt_len, cpg->p.sg_dst_left);
+                       dst_copy = min(need_copy_len, cpg->p.sg_dst_left);
 
-               memcpy(buf, cpg->sram + SRAM_DATA_OUT_START, dst_copy);
+                       memcpy(buf,
+                              cpg->sram + SRAM_DATA_OUT_START + sram_offset,
+                              dst_copy);
+                       sram_offset += dst_copy;
+                       cpg->p.sg_dst_left -= dst_copy;
+                       need_copy_len -= dst_copy;
+                       cpg->p.dst_start += dst_copy;
+               } while (need_copy_len > 0);
+       }
 
-               cpg->p.sg_dst_left -= dst_copy;
-               cpg->p.crypt_len -= dst_copy;
-               cpg->p.dst_start += dst_copy;
-       } while (cpg->p.crypt_len > 0);
+       cpg->p.crypt_len = 0;
 
        BUG_ON(cpg->eng_st != ENGINE_W_DEQUEUE);
-       if (cpg->p.total_req_bytes < req->nbytes) {
+       if (cpg->p.hw_processed_bytes < cpg->p.hw_nbytes) {
                /* process next scatter list entry */
                cpg->eng_st = ENGINE_BUSY;
-               mv_process_current_q(0);
+               cpg->p.process(0);
        } else {
-               sg_miter_stop(&cpg->p.src_sg_it);
-               sg_miter_stop(&cpg->p.dst_sg_it);
-               mv_crypto_algo_completion();
+               cpg->p.complete();
                cpg->eng_st = ENGINE_IDLE;
-               req->base.complete(&req->base, 0);
+               local_bh_disable();
+               req->complete(req, 0);
+               local_bh_enable();
        }
 }
 
 static int count_sgs(struct scatterlist *sl, unsigned int total_bytes)
 {
        int i = 0;
-
-       do {
-               total_bytes -= sl[i].length;
-               i++;
-
-       } while (total_bytes > 0);
+       size_t cur_len;
+
+       while (1) {
+               cur_len = sl[i].length;
+               ++i;
+               if (total_bytes > cur_len)
+                       total_bytes -= cur_len;
+               else
+                       break;
+       }
 
        return i;
 }
 
-static void mv_enqueue_new_req(struct ablkcipher_request *req)
+static void mv_start_new_crypt_req(struct ablkcipher_request *req)
 {
+       struct req_progress *p = &cpg->p;
        int num_sgs;
 
-       cpg->cur_req = req;
-       memset(&cpg->p, 0, sizeof(struct req_progress));
+       cpg->cur_req = &req->base;
+       memset(p, 0, sizeof(struct req_progress));
+       p->hw_nbytes = req->nbytes;
+       p->complete = mv_crypto_algo_completion;
+       p->process = mv_process_current_q;
+       p->copy_back = 1;
 
        num_sgs = count_sgs(req->src, req->nbytes);
-       sg_miter_start(&cpg->p.src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG);
+       sg_miter_start(&p->src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG);
 
        num_sgs = count_sgs(req->dst, req->nbytes);
-       sg_miter_start(&cpg->p.dst_sg_it, req->dst, num_sgs, SG_MITER_TO_SG);
+       sg_miter_start(&p->dst_sg_it, req->dst, num_sgs, SG_MITER_TO_SG);
+
        mv_process_current_q(1);
 }
 
+static void mv_start_new_hash_req(struct ahash_request *req)
+{
+       struct req_progress *p = &cpg->p;
+       struct mv_req_hash_ctx *ctx = ahash_request_ctx(req);
+       const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm);
+       int num_sgs, hw_bytes, old_extra_bytes, rc;
+       cpg->cur_req = &req->base;
+       memset(p, 0, sizeof(struct req_progress));
+       hw_bytes = req->nbytes + ctx->extra_bytes;
+       old_extra_bytes = ctx->extra_bytes;
+
+       if (unlikely(ctx->extra_bytes)) {
+               memcpy(cpg->sram + SRAM_DATA_IN_START, ctx->buffer,
+                      ctx->extra_bytes);
+               p->crypt_len = ctx->extra_bytes;
+       }
+
+       memcpy(cpg->sram + SRAM_HMAC_IV_IN, tfm_ctx->ivs, sizeof(tfm_ctx->ivs));
+
+       if (unlikely(!ctx->first_hash)) {
+               writel(ctx->state[0], cpg->reg + DIGEST_INITIAL_VAL_A);
+               writel(ctx->state[1], cpg->reg + DIGEST_INITIAL_VAL_B);
+               writel(ctx->state[2], cpg->reg + DIGEST_INITIAL_VAL_C);
+               writel(ctx->state[3], cpg->reg + DIGEST_INITIAL_VAL_D);
+               writel(ctx->state[4], cpg->reg + DIGEST_INITIAL_VAL_E);
+       }
+
+       ctx->extra_bytes = hw_bytes % SHA1_BLOCK_SIZE;
+       if (ctx->extra_bytes != 0
+           && (!ctx->last_chunk || ctx->count > MAX_HW_HASH_SIZE))
+               hw_bytes -= ctx->extra_bytes;
+       else
+               ctx->extra_bytes = 0;
+
+       num_sgs = count_sgs(req->src, req->nbytes);
+       sg_miter_start(&p->src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG);
+
+       if (hw_bytes) {
+               p->hw_nbytes = hw_bytes;
+               p->complete = mv_hash_algo_completion;
+               p->process = mv_process_hash_current;
+
+               mv_process_hash_current(1);
+       } else {
+               copy_src_to_buf(p, ctx->buffer + old_extra_bytes,
+                               ctx->extra_bytes - old_extra_bytes);
+               sg_miter_stop(&p->src_sg_it);
+               if (ctx->last_chunk)
+                       rc = mv_hash_final_fallback(req);
+               else
+                       rc = 0;
+               cpg->eng_st = ENGINE_IDLE;
+               local_bh_disable();
+               req->base.complete(&req->base, rc);
+               local_bh_enable();
+       }
+}
+
 static int queue_manag(void *data)
 {
        cpg->eng_st = ENGINE_IDLE;
        do {
-               struct ablkcipher_request *req;
                struct crypto_async_request *async_req = NULL;
                struct crypto_async_request *backlog;
 
@@ -338,9 +600,18 @@ static int queue_manag(void *data)
                }
 
                if (async_req) {
-                       req = container_of(async_req,
-                                       struct ablkcipher_request, base);
-                       mv_enqueue_new_req(req);
+                       if (async_req->tfm->__crt_alg->cra_type !=
+                           &crypto_ahash_type) {
+                               struct ablkcipher_request *req =
+                                   container_of(async_req,
+                                                struct ablkcipher_request,
+                                                base);
+                               mv_start_new_crypt_req(req);
+                       } else {
+                               struct ahash_request *req =
+                                   ahash_request_cast(async_req);
+                               mv_start_new_hash_req(req);
+                       }
                        async_req = NULL;
                }
 
@@ -350,13 +621,13 @@ static int queue_manag(void *data)
        return 0;
 }
 
-static int mv_handle_req(struct ablkcipher_request *req)
+static int mv_handle_req(struct crypto_async_request *req)
 {
        unsigned long flags;
        int ret;
 
        spin_lock_irqsave(&cpg->lock, flags);
-       ret = ablkcipher_enqueue_request(&cpg->queue, req);
+       ret = crypto_enqueue_request(&cpg->queue, req);
        spin_unlock_irqrestore(&cpg->lock, flags);
        wake_up_process(cpg->queue_th);
        return ret;
@@ -369,7 +640,7 @@ static int mv_enc_aes_ecb(struct ablkcipher_request *req)
        req_ctx->op = COP_AES_ECB;
        req_ctx->decrypt = 0;
 
-       return mv_handle_req(req);
+       return mv_handle_req(&req->base);
 }
 
 static int mv_dec_aes_ecb(struct ablkcipher_request *req)
@@ -381,7 +652,7 @@ static int mv_dec_aes_ecb(struct ablkcipher_request *req)
        req_ctx->decrypt = 1;
 
        compute_aes_dec_key(ctx);
-       return mv_handle_req(req);
+       return mv_handle_req(&req->base);
 }
 
 static int mv_enc_aes_cbc(struct ablkcipher_request *req)
@@ -391,7 +662,7 @@ static int mv_enc_aes_cbc(struct ablkcipher_request *req)
        req_ctx->op = COP_AES_CBC;
        req_ctx->decrypt = 0;
 
-       return mv_handle_req(req);
+       return mv_handle_req(&req->base);
 }
 
 static int mv_dec_aes_cbc(struct ablkcipher_request *req)
@@ -403,7 +674,7 @@ static int mv_dec_aes_cbc(struct ablkcipher_request *req)
        req_ctx->decrypt = 1;
 
        compute_aes_dec_key(ctx);
-       return mv_handle_req(req);
+       return mv_handle_req(&req->base);
 }
 
 static int mv_cra_init(struct crypto_tfm *tfm)
@@ -412,6 +683,215 @@ static int mv_cra_init(struct crypto_tfm *tfm)
        return 0;
 }
 
+static void mv_init_hash_req_ctx(struct mv_req_hash_ctx *ctx, int op,
+                                int is_last, unsigned int req_len,
+                                int count_add)
+{
+       memset(ctx, 0, sizeof(*ctx));
+       ctx->op = op;
+       ctx->count = req_len;
+       ctx->first_hash = 1;
+       ctx->last_chunk = is_last;
+       ctx->count_add = count_add;
+}
+
+static void mv_update_hash_req_ctx(struct mv_req_hash_ctx *ctx, int is_last,
+                                  unsigned req_len)
+{
+       ctx->last_chunk = is_last;
+       ctx->count += req_len;
+}
+
+static int mv_hash_init(struct ahash_request *req)
+{
+       const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm);
+       mv_init_hash_req_ctx(ahash_request_ctx(req), tfm_ctx->op, 0, 0,
+                            tfm_ctx->count_add);
+       return 0;
+}
+
+static int mv_hash_update(struct ahash_request *req)
+{
+       if (!req->nbytes)
+               return 0;
+
+       mv_update_hash_req_ctx(ahash_request_ctx(req), 0, req->nbytes);
+       return mv_handle_req(&req->base);
+}
+
+static int mv_hash_final(struct ahash_request *req)
+{
+       struct mv_req_hash_ctx *ctx = ahash_request_ctx(req);
+       /* dummy buffer of 4 bytes */
+       sg_init_one(&ctx->dummysg, ctx->buffer, 4);
+       /* I think I'm allowed to do that... */
+       ahash_request_set_crypt(req, &ctx->dummysg, req->result, 0);
+       mv_update_hash_req_ctx(ctx, 1, 0);
+       return mv_handle_req(&req->base);
+}
+
+static int mv_hash_finup(struct ahash_request *req)
+{
+       if (!req->nbytes)
+               return mv_hash_final(req);
+
+       mv_update_hash_req_ctx(ahash_request_ctx(req), 1, req->nbytes);
+       return mv_handle_req(&req->base);
+}
+
+static int mv_hash_digest(struct ahash_request *req)
+{
+       const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm);
+       mv_init_hash_req_ctx(ahash_request_ctx(req), tfm_ctx->op, 1,
+                            req->nbytes, tfm_ctx->count_add);
+       return mv_handle_req(&req->base);
+}
+
+static void mv_hash_init_ivs(struct mv_tfm_hash_ctx *ctx, const void *istate,
+                            const void *ostate)
+{
+       const struct sha1_state *isha1_state = istate, *osha1_state = ostate;
+       int i;
+       for (i = 0; i < 5; i++) {
+               ctx->ivs[i] = cpu_to_be32(isha1_state->state[i]);
+               ctx->ivs[i + 5] = cpu_to_be32(osha1_state->state[i]);
+       }
+}
+
+static int mv_hash_setkey(struct crypto_ahash *tfm, const u8 * key,
+                         unsigned int keylen)
+{
+       int rc;
+       struct mv_tfm_hash_ctx *ctx = crypto_tfm_ctx(&tfm->base);
+       int bs, ds, ss;
+
+       if (!ctx->base_hash)
+               return 0;
+
+       rc = crypto_shash_setkey(ctx->fallback, key, keylen);
+       if (rc)
+               return rc;
+
+       /* Can't see a way to extract the ipad/opad from the fallback tfm
+          so I'm basically copying code from the hmac module */
+       bs = crypto_shash_blocksize(ctx->base_hash);
+       ds = crypto_shash_digestsize(ctx->base_hash);
+       ss = crypto_shash_statesize(ctx->base_hash);
+
+       {
+               struct {
+                       struct shash_desc shash;
+                       char ctx[crypto_shash_descsize(ctx->base_hash)];
+               } desc;
+               unsigned int i;
+               char ipad[ss];
+               char opad[ss];
+
+               desc.shash.tfm = ctx->base_hash;
+               desc.shash.flags = crypto_shash_get_flags(ctx->base_hash) &
+                   CRYPTO_TFM_REQ_MAY_SLEEP;
+
+               if (keylen > bs) {
+                       int err;
+
+                       err =
+                           crypto_shash_digest(&desc.shash, key, keylen, ipad);
+                       if (err)
+                               return err;
+
+                       keylen = ds;
+               } else
+                       memcpy(ipad, key, keylen);
+
+               memset(ipad + keylen, 0, bs - keylen);
+               memcpy(opad, ipad, bs);
+
+               for (i = 0; i < bs; i++) {
+                       ipad[i] ^= 0x36;
+                       opad[i] ^= 0x5c;
+               }
+
+               rc = crypto_shash_init(&desc.shash) ? :
+                   crypto_shash_update(&desc.shash, ipad, bs) ? :
+                   crypto_shash_export(&desc.shash, ipad) ? :
+                   crypto_shash_init(&desc.shash) ? :
+                   crypto_shash_update(&desc.shash, opad, bs) ? :
+                   crypto_shash_export(&desc.shash, opad);
+
+               if (rc == 0)
+                       mv_hash_init_ivs(ctx, ipad, opad);
+
+               return rc;
+       }
+}
+
+static int mv_cra_hash_init(struct crypto_tfm *tfm, const char *base_hash_name,
+                           enum hash_op op, int count_add)
+{
+       const char *fallback_driver_name = tfm->__crt_alg->cra_name;
+       struct mv_tfm_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct crypto_shash *fallback_tfm = NULL;
+       struct crypto_shash *base_hash = NULL;
+       int err = -ENOMEM;
+
+       ctx->op = op;
+       ctx->count_add = count_add;
+
+       /* Allocate a fallback and abort if it failed. */
+       fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
+                                         CRYPTO_ALG_NEED_FALLBACK);
+       if (IS_ERR(fallback_tfm)) {
+               printk(KERN_WARNING MV_CESA
+                      "Fallback driver '%s' could not be loaded!\n",
+                      fallback_driver_name);
+               err = PTR_ERR(fallback_tfm);
+               goto out;
+       }
+       ctx->fallback = fallback_tfm;
+
+       if (base_hash_name) {
+               /* Allocate a hash to compute the ipad/opad of hmac. */
+               base_hash = crypto_alloc_shash(base_hash_name, 0,
+                                              CRYPTO_ALG_NEED_FALLBACK);
+               if (IS_ERR(base_hash)) {
+                       printk(KERN_WARNING MV_CESA
+                              "Base driver '%s' could not be loaded!\n",
+                              base_hash_name);
+                       err = PTR_ERR(fallback_tfm);
+                       goto err_bad_base;
+               }
+       }
+       ctx->base_hash = base_hash;
+
+       crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+                                sizeof(struct mv_req_hash_ctx) +
+                                crypto_shash_descsize(ctx->fallback));
+       return 0;
+err_bad_base:
+       crypto_free_shash(fallback_tfm);
+out:
+       return err;
+}
+
+static void mv_cra_hash_exit(struct crypto_tfm *tfm)
+{
+       struct mv_tfm_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       crypto_free_shash(ctx->fallback);
+       if (ctx->base_hash)
+               crypto_free_shash(ctx->base_hash);
+}
+
+static int mv_cra_hash_sha1_init(struct crypto_tfm *tfm)
+{
+       return mv_cra_hash_init(tfm, NULL, COP_SHA1, 0);
+}
+
+static int mv_cra_hash_hmac_sha1_init(struct crypto_tfm *tfm)
+{
+       return mv_cra_hash_init(tfm, "sha1", COP_HMAC_SHA1, SHA1_BLOCK_SIZE);
+}
+
 irqreturn_t crypto_int(int irq, void *priv)
 {
        u32 val;
@@ -474,6 +954,53 @@ struct crypto_alg mv_aes_alg_cbc = {
        },
 };
 
+struct ahash_alg mv_sha1_alg = {
+       .init = mv_hash_init,
+       .update = mv_hash_update,
+       .final = mv_hash_final,
+       .finup = mv_hash_finup,
+       .digest = mv_hash_digest,
+       .halg = {
+                .digestsize = SHA1_DIGEST_SIZE,
+                .base = {
+                         .cra_name = "sha1",
+                         .cra_driver_name = "mv-sha1",
+                         .cra_priority = 300,
+                         .cra_flags =
+                         CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+                         .cra_blocksize = SHA1_BLOCK_SIZE,
+                         .cra_ctxsize = sizeof(struct mv_tfm_hash_ctx),
+                         .cra_init = mv_cra_hash_sha1_init,
+                         .cra_exit = mv_cra_hash_exit,
+                         .cra_module = THIS_MODULE,
+                         }
+                }
+};
+
+struct ahash_alg mv_hmac_sha1_alg = {
+       .init = mv_hash_init,
+       .update = mv_hash_update,
+       .final = mv_hash_final,
+       .finup = mv_hash_finup,
+       .digest = mv_hash_digest,
+       .setkey = mv_hash_setkey,
+       .halg = {
+                .digestsize = SHA1_DIGEST_SIZE,
+                .base = {
+                         .cra_name = "hmac(sha1)",
+                         .cra_driver_name = "mv-hmac-sha1",
+                         .cra_priority = 300,
+                         .cra_flags =
+                         CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+                         .cra_blocksize = SHA1_BLOCK_SIZE,
+                         .cra_ctxsize = sizeof(struct mv_tfm_hash_ctx),
+                         .cra_init = mv_cra_hash_hmac_sha1_init,
+                         .cra_exit = mv_cra_hash_exit,
+                         .cra_module = THIS_MODULE,
+                         }
+                }
+};
+
 static int mv_probe(struct platform_device *pdev)
 {
        struct crypto_priv *cp;
@@ -482,7 +1009,7 @@ static int mv_probe(struct platform_device *pdev)
        int ret;
 
        if (cpg) {
-               printk(KERN_ERR "Second crypto dev?\n");
+               printk(KERN_ERR MV_CESA "Second crypto dev?\n");
                return -EEXIST;
        }
 
@@ -496,7 +1023,7 @@ static int mv_probe(struct platform_device *pdev)
 
        spin_lock_init(&cp->lock);
        crypto_init_queue(&cp->queue, 50);
-       cp->reg = ioremap(res->start, res->end - res->start + 1);
+       cp->reg = ioremap(res->start, resource_size(res));
        if (!cp->reg) {
                ret = -ENOMEM;
                goto err;
@@ -507,7 +1034,7 @@ static int mv_probe(struct platform_device *pdev)
                ret = -ENXIO;
                goto err_unmap_reg;
        }
-       cp->sram_size = res->end - res->start + 1;
+       cp->sram_size = resource_size(res);
        cp->max_req_size = cp->sram_size - SRAM_CFG_SPACE;
        cp->sram = ioremap(res->start, cp->sram_size);
        if (!cp->sram) {
@@ -546,6 +1073,21 @@ static int mv_probe(struct platform_device *pdev)
        ret = crypto_register_alg(&mv_aes_alg_cbc);
        if (ret)
                goto err_unreg_ecb;
+
+       ret = crypto_register_ahash(&mv_sha1_alg);
+       if (ret == 0)
+               cpg->has_sha1 = 1;
+       else
+               printk(KERN_WARNING MV_CESA "Could not register sha1 driver\n");
+
+       ret = crypto_register_ahash(&mv_hmac_sha1_alg);
+       if (ret == 0) {
+               cpg->has_hmac_sha1 = 1;
+       } else {
+               printk(KERN_WARNING MV_CESA
+                      "Could not register hmac-sha1 driver\n");
+       }
+
        return 0;
 err_unreg_ecb:
        crypto_unregister_alg(&mv_aes_alg_ecb);
@@ -570,6 +1112,10 @@ static int mv_remove(struct platform_device *pdev)
 
        crypto_unregister_alg(&mv_aes_alg_ecb);
        crypto_unregister_alg(&mv_aes_alg_cbc);
+       if (cp->has_sha1)
+               crypto_unregister_ahash(&mv_sha1_alg);
+       if (cp->has_hmac_sha1)
+               crypto_unregister_ahash(&mv_hmac_sha1_alg);
        kthread_stop(cp->queue_th);
        free_irq(cp->irq, cp);
        memset(cp->sram, 0, cp->sram_size);
index c3e25d3..08fcb11 100644 (file)
@@ -1,6 +1,10 @@
 #ifndef __MV_CRYPTO_H__
 
 #define DIGEST_INITIAL_VAL_A   0xdd00
+#define DIGEST_INITIAL_VAL_B   0xdd04
+#define DIGEST_INITIAL_VAL_C   0xdd08
+#define DIGEST_INITIAL_VAL_D   0xdd0c
+#define DIGEST_INITIAL_VAL_E   0xdd10
 #define DES_CMD_REG            0xdd58
 
 #define SEC_ACCEL_CMD          0xde00
@@ -70,6 +74,10 @@ struct sec_accel_config {
 #define CFG_AES_LEN_128                (0 << 24)
 #define CFG_AES_LEN_192                (1 << 24)
 #define CFG_AES_LEN_256                (2 << 24)
+#define CFG_NOT_FRAG           (0 << 30)
+#define CFG_FIRST_FRAG         (1 << 30)
+#define CFG_LAST_FRAG          (2 << 30)
+#define CFG_MID_FRAG           (3 << 30)
 
        u32 enc_p;
 #define ENC_P_SRC(x)           (x)
@@ -90,7 +98,11 @@ struct sec_accel_config {
 #define MAC_SRC_TOTAL_LEN(x)   ((x) << 16)
 
        u32 mac_digest;
+#define MAC_DIGEST_P(x)        (x)
+#define MAC_FRAG_LEN(x)        ((x) << 16)
        u32 mac_iv;
+#define MAC_INNER_IV_P(x)      (x)
+#define MAC_OUTER_IV_P(x)      ((x) << 16)
 }__attribute__ ((packed));
        /*
         * /-----------\ 0
@@ -101,19 +113,37 @@ struct sec_accel_config {
         * |  IV   IN  |        4 * 4
         * |-----------| 0x40 (inplace)
         * |  IV BUF   |        4 * 4
-        * |-----------| 0x50
+        * |-----------| 0x80
         * |  DATA IN  |        16 * x (max ->max_req_size)
-        * |-----------| 0x50 (inplace operation)
+        * |-----------| 0x80 (inplace operation)
         * |  DATA OUT |        16 * x (max ->max_req_size)
         * \-----------/ SRAM size
         */
+
+       /* Hashing memory map:
+        * /-----------\ 0
+        * | ACCEL CFG |        4 * 8
+        * |-----------| 0x20
+        * | Inner IV  |        5 * 4
+        * |-----------| 0x34
+        * | Outer IV  |        5 * 4
+        * |-----------| 0x48
+        * | Output BUF|        5 * 4
+        * |-----------| 0x80
+        * |  DATA IN  |        64 * x (max ->max_req_size)
+        * \-----------/ SRAM size
+        */
 #define SRAM_CONFIG            0x00
 #define SRAM_DATA_KEY_P                0x20
 #define SRAM_DATA_IV           0x40
 #define SRAM_DATA_IV_BUF       0x40
-#define SRAM_DATA_IN_START     0x50
-#define SRAM_DATA_OUT_START    0x50
+#define SRAM_DATA_IN_START     0x80
+#define SRAM_DATA_OUT_START    0x80
+
+#define SRAM_HMAC_IV_IN                0x20
+#define SRAM_HMAC_IV_OUT       0x34
+#define SRAM_DIGEST_BUF                0x48
 
-#define SRAM_CFG_SPACE         0x50
+#define SRAM_CFG_SPACE         0x80
 
 #endif
diff --git a/drivers/crypto/n2_asm.S b/drivers/crypto/n2_asm.S
new file mode 100644 (file)
index 0000000..f7c7937
--- /dev/null
@@ -0,0 +1,95 @@
+/* n2_asm.S: Hypervisor calls for NCS support.
+ *
+ * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/linkage.h>
+#include <asm/hypervisor.h>
+#include "n2_core.h"
+
+       /* o0: queue type
+        * o1: RA of queue
+        * o2: num entries in queue
+        * o3: address of queue handle return
+        */
+ENTRY(sun4v_ncs_qconf)
+       mov     HV_FAST_NCS_QCONF, %o5
+       ta      HV_FAST_TRAP
+       stx     %o1, [%o3]
+       retl
+        nop
+ENDPROC(sun4v_ncs_qconf)
+
+       /* %o0: queue handle
+        * %o1: address of queue type return
+        * %o2: address of queue base address return
+        * %o3: address of queue num entries return
+        */
+ENTRY(sun4v_ncs_qinfo)
+       mov     %o1, %g1
+       mov     %o2, %g2
+       mov     %o3, %g3
+       mov     HV_FAST_NCS_QINFO, %o5
+       ta      HV_FAST_TRAP
+       stx     %o1, [%g1]
+       stx     %o2, [%g2]
+       stx     %o3, [%g3]
+       retl
+        nop
+ENDPROC(sun4v_ncs_qinfo)
+
+       /* %o0: queue handle
+        * %o1: address of head offset return
+        */
+ENTRY(sun4v_ncs_gethead)
+       mov     %o1, %o2
+       mov     HV_FAST_NCS_GETHEAD, %o5
+       ta      HV_FAST_TRAP
+       stx     %o1, [%o2]
+       retl
+        nop
+ENDPROC(sun4v_ncs_gethead)
+
+       /* %o0: queue handle
+        * %o1: address of tail offset return
+        */
+ENTRY(sun4v_ncs_gettail)
+       mov     %o1, %o2
+       mov     HV_FAST_NCS_GETTAIL, %o5
+       ta      HV_FAST_TRAP
+       stx     %o1, [%o2]
+       retl
+        nop
+ENDPROC(sun4v_ncs_gettail)
+
+       /* %o0: queue handle
+        * %o1: new tail offset
+        */
+ENTRY(sun4v_ncs_settail)
+       mov     HV_FAST_NCS_SETTAIL, %o5
+       ta      HV_FAST_TRAP
+       retl
+        nop
+ENDPROC(sun4v_ncs_settail)
+
+       /* %o0: queue handle
+        * %o1: address of devino return
+        */
+ENTRY(sun4v_ncs_qhandle_to_devino)
+       mov     %o1, %o2
+       mov     HV_FAST_NCS_QHANDLE_TO_DEVINO, %o5
+       ta      HV_FAST_TRAP
+       stx     %o1, [%o2]
+       retl
+        nop
+ENDPROC(sun4v_ncs_qhandle_to_devino)
+
+       /* %o0: queue handle
+        * %o1: new head offset
+        */
+ENTRY(sun4v_ncs_sethead_marker)
+       mov     HV_FAST_NCS_SETHEAD_MARKER, %o5
+       ta      HV_FAST_TRAP
+       retl
+        nop
+ENDPROC(sun4v_ncs_sethead_marker)
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
new file mode 100644 (file)
index 0000000..8566be8
--- /dev/null
@@ -0,0 +1,2083 @@
+/* n2_core.c: Niagara2 Stream Processing Unit (SPU) crypto support.
+ *
+ * Copyright (C) 2010 David S. Miller <davem@davemloft.net>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/cpumask.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/crypto.h>
+#include <crypto/md5.h>
+#include <crypto/sha.h>
+#include <crypto/aes.h>
+#include <crypto/des.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+
+#include <crypto/internal/hash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+
+#include <asm/hypervisor.h>
+#include <asm/mdesc.h>
+
+#include "n2_core.h"
+
+#define DRV_MODULE_NAME                "n2_crypto"
+#define DRV_MODULE_VERSION     "0.1"
+#define DRV_MODULE_RELDATE     "April 29, 2010"
+
+static char version[] __devinitdata =
+       DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_DESCRIPTION("Niagara2 Crypto driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+#define N2_CRA_PRIORITY                300
+
+static DEFINE_MUTEX(spu_lock);
+
+struct spu_queue {
+       cpumask_t               sharing;
+       unsigned long           qhandle;
+
+       spinlock_t              lock;
+       u8                      q_type;
+       void                    *q;
+       unsigned long           head;
+       unsigned long           tail;
+       struct list_head        jobs;
+
+       unsigned long           devino;
+
+       char                    irq_name[32];
+       unsigned int            irq;
+
+       struct list_head        list;
+};
+
+static struct spu_queue **cpu_to_cwq;
+static struct spu_queue **cpu_to_mau;
+
+static unsigned long spu_next_offset(struct spu_queue *q, unsigned long off)
+{
+       if (q->q_type == HV_NCS_QTYPE_MAU) {
+               off += MAU_ENTRY_SIZE;
+               if (off == (MAU_ENTRY_SIZE * MAU_NUM_ENTRIES))
+                       off = 0;
+       } else {
+               off += CWQ_ENTRY_SIZE;
+               if (off == (CWQ_ENTRY_SIZE * CWQ_NUM_ENTRIES))
+                       off = 0;
+       }
+       return off;
+}
+
+struct n2_request_common {
+       struct list_head        entry;
+       unsigned int            offset;
+};
+#define OFFSET_NOT_RUNNING     (~(unsigned int)0)
+
+/* An async job request records the final tail value it used in
+ * n2_request_common->offset, test to see if that offset is in
+ * the range old_head, new_head, inclusive.
+ */
+static inline bool job_finished(struct spu_queue *q, unsigned int offset,
+                               unsigned long old_head, unsigned long new_head)
+{
+       if (old_head <= new_head) {
+               if (offset > old_head && offset <= new_head)
+                       return true;
+       } else {
+               if (offset > old_head || offset <= new_head)
+                       return true;
+       }
+       return false;
+}
+
+/* When the HEAD marker is unequal to the actual HEAD, we get
+ * a virtual device INO interrupt.  We should process the
+ * completed CWQ entries and adjust the HEAD marker to clear
+ * the IRQ.
+ */
+static irqreturn_t cwq_intr(int irq, void *dev_id)
+{
+       unsigned long off, new_head, hv_ret;
+       struct spu_queue *q = dev_id;
+
+       pr_err("CPU[%d]: Got CWQ interrupt for qhdl[%lx]\n",
+              smp_processor_id(), q->qhandle);
+
+       spin_lock(&q->lock);
+
+       hv_ret = sun4v_ncs_gethead(q->qhandle, &new_head);
+
+       pr_err("CPU[%d]: CWQ gethead[%lx] hv_ret[%lu]\n",
+              smp_processor_id(), new_head, hv_ret);
+
+       for (off = q->head; off != new_head; off = spu_next_offset(q, off)) {
+               /* XXX ... XXX */
+       }
+
+       hv_ret = sun4v_ncs_sethead_marker(q->qhandle, new_head);
+       if (hv_ret == HV_EOK)
+               q->head = new_head;
+
+       spin_unlock(&q->lock);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t mau_intr(int irq, void *dev_id)
+{
+       struct spu_queue *q = dev_id;
+       unsigned long head, hv_ret;
+
+       spin_lock(&q->lock);
+
+       pr_err("CPU[%d]: Got MAU interrupt for qhdl[%lx]\n",
+              smp_processor_id(), q->qhandle);
+
+       hv_ret = sun4v_ncs_gethead(q->qhandle, &head);
+
+       pr_err("CPU[%d]: MAU gethead[%lx] hv_ret[%lu]\n",
+              smp_processor_id(), head, hv_ret);
+
+       sun4v_ncs_sethead_marker(q->qhandle, head);
+
+       spin_unlock(&q->lock);
+
+       return IRQ_HANDLED;
+}
+
+static void *spu_queue_next(struct spu_queue *q, void *cur)
+{
+       return q->q + spu_next_offset(q, cur - q->q);
+}
+
+static int spu_queue_num_free(struct spu_queue *q)
+{
+       unsigned long head = q->head;
+       unsigned long tail = q->tail;
+       unsigned long end = (CWQ_ENTRY_SIZE * CWQ_NUM_ENTRIES);
+       unsigned long diff;
+
+       if (head > tail)
+               diff = head - tail;
+       else
+               diff = (end - tail) + head;
+
+       return (diff / CWQ_ENTRY_SIZE) - 1;
+}
+
+static void *spu_queue_alloc(struct spu_queue *q, int num_entries)
+{
+       int avail = spu_queue_num_free(q);
+
+       if (avail >= num_entries)
+               return q->q + q->tail;
+
+       return NULL;
+}
+
+static unsigned long spu_queue_submit(struct spu_queue *q, void *last)
+{
+       unsigned long hv_ret, new_tail;
+
+       new_tail = spu_next_offset(q, last - q->q);
+
+       hv_ret = sun4v_ncs_settail(q->qhandle, new_tail);
+       if (hv_ret == HV_EOK)
+               q->tail = new_tail;
+       return hv_ret;
+}
+
+static u64 control_word_base(unsigned int len, unsigned int hmac_key_len,
+                            int enc_type, int auth_type,
+                            unsigned int hash_len,
+                            bool sfas, bool sob, bool eob, bool encrypt,
+                            int opcode)
+{
+       u64 word = (len - 1) & CONTROL_LEN;
+
+       word |= ((u64) opcode << CONTROL_OPCODE_SHIFT);
+       word |= ((u64) enc_type << CONTROL_ENC_TYPE_SHIFT);
+       word |= ((u64) auth_type << CONTROL_AUTH_TYPE_SHIFT);
+       if (sfas)
+               word |= CONTROL_STORE_FINAL_AUTH_STATE;
+       if (sob)
+               word |= CONTROL_START_OF_BLOCK;
+       if (eob)
+               word |= CONTROL_END_OF_BLOCK;
+       if (encrypt)
+               word |= CONTROL_ENCRYPT;
+       if (hmac_key_len)
+               word |= ((u64) (hmac_key_len - 1)) << CONTROL_HMAC_KEY_LEN_SHIFT;
+       if (hash_len)
+               word |= ((u64) (hash_len - 1)) << CONTROL_HASH_LEN_SHIFT;
+
+       return word;
+}
+
+#if 0
+static inline bool n2_should_run_async(struct spu_queue *qp, int this_len)
+{
+       if (this_len >= 64 ||
+           qp->head != qp->tail)
+               return true;
+       return false;
+}
+#endif
+
+struct n2_base_ctx {
+       struct list_head                list;
+};
+
+static void n2_base_ctx_init(struct n2_base_ctx *ctx)
+{
+       INIT_LIST_HEAD(&ctx->list);
+}
+
+struct n2_hash_ctx {
+       struct n2_base_ctx              base;
+
+       struct crypto_ahash             *fallback;
+
+       /* These next three members must match the layout created by
+        * crypto_init_shash_ops_async.  This allows us to properly
+        * plumb requests we can't do in hardware down to the fallback
+        * operation, providing all of the data structures and layouts
+        * expected by those paths.
+        */
+       struct ahash_request            fallback_req;
+       struct shash_desc               fallback_desc;
+       union {
+               struct md5_state        md5;
+               struct sha1_state       sha1;
+               struct sha256_state     sha256;
+       } u;
+
+       unsigned char                   hash_key[64];
+       unsigned char                   keyed_zero_hash[32];
+};
+
+static int n2_hash_async_init(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+       ctx->fallback_req.base.tfm = crypto_ahash_tfm(ctx->fallback);
+       ctx->fallback_req.base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       return crypto_ahash_init(&ctx->fallback_req);
+}
+
+static int n2_hash_async_update(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+       ctx->fallback_req.base.tfm = crypto_ahash_tfm(ctx->fallback);
+       ctx->fallback_req.base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+       ctx->fallback_req.nbytes = req->nbytes;
+       ctx->fallback_req.src = req->src;
+
+       return crypto_ahash_update(&ctx->fallback_req);
+}
+
+static int n2_hash_async_final(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+       ctx->fallback_req.base.tfm = crypto_ahash_tfm(ctx->fallback);
+       ctx->fallback_req.base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+       ctx->fallback_req.result = req->result;
+
+       return crypto_ahash_final(&ctx->fallback_req);
+}
+
+static int n2_hash_async_finup(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+       ctx->fallback_req.base.tfm = crypto_ahash_tfm(ctx->fallback);
+       ctx->fallback_req.base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+       ctx->fallback_req.nbytes = req->nbytes;
+       ctx->fallback_req.src = req->src;
+       ctx->fallback_req.result = req->result;
+
+       return crypto_ahash_finup(&ctx->fallback_req);
+}
+
+static int n2_hash_cra_init(struct crypto_tfm *tfm)
+{
+       const char *fallback_driver_name = tfm->__crt_alg->cra_name;
+       struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+       struct n2_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+       struct crypto_ahash *fallback_tfm;
+       int err;
+
+       fallback_tfm = crypto_alloc_ahash(fallback_driver_name, 0,
+                                         CRYPTO_ALG_NEED_FALLBACK);
+       if (IS_ERR(fallback_tfm)) {
+               pr_warning("Fallback driver '%s' could not be loaded!\n",
+                          fallback_driver_name);
+               err = PTR_ERR(fallback_tfm);
+               goto out;
+       }
+
+       ctx->fallback = fallback_tfm;
+       return 0;
+
+out:
+       return err;
+}
+
+static void n2_hash_cra_exit(struct crypto_tfm *tfm)
+{
+       struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+       struct n2_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+
+       crypto_free_ahash(ctx->fallback);
+}
+
+static unsigned long wait_for_tail(struct spu_queue *qp)
+{
+       unsigned long head, hv_ret;
+
+       do {
+               hv_ret = sun4v_ncs_gethead(qp->qhandle, &head);
+               if (hv_ret != HV_EOK) {
+                       pr_err("Hypervisor error on gethead\n");
+                       break;
+               }
+               if (head == qp->tail) {
+                       qp->head = head;
+                       break;
+               }
+       } while (1);
+       return hv_ret;
+}
+
+static unsigned long submit_and_wait_for_tail(struct spu_queue *qp,
+                                             struct cwq_initial_entry *ent)
+{
+       unsigned long hv_ret = spu_queue_submit(qp, ent);
+
+       if (hv_ret == HV_EOK)
+               hv_ret = wait_for_tail(qp);
+
+       return hv_ret;
+}
+
+static int n2_hash_async_digest(struct ahash_request *req,
+                               unsigned int auth_type, unsigned int digest_size,
+                               unsigned int result_size, void *hash_loc)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+       struct cwq_initial_entry *ent;
+       struct crypto_hash_walk walk;
+       struct spu_queue *qp;
+       unsigned long flags;
+       int err = -ENODEV;
+       int nbytes, cpu;
+
+       /* The total effective length of the operation may not
+        * exceed 2^16.
+        */
+       if (unlikely(req->nbytes > (1 << 16))) {
+               ctx->fallback_req.base.tfm = crypto_ahash_tfm(ctx->fallback);
+               ctx->fallback_req.base.flags =
+                       req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+               ctx->fallback_req.nbytes = req->nbytes;
+               ctx->fallback_req.src = req->src;
+               ctx->fallback_req.result = req->result;
+
+               return crypto_ahash_digest(&ctx->fallback_req);
+       }
+
+       n2_base_ctx_init(&ctx->base);
+
+       nbytes = crypto_hash_walk_first(req, &walk);
+
+       cpu = get_cpu();
+       qp = cpu_to_cwq[cpu];
+       if (!qp)
+               goto out;
+
+       spin_lock_irqsave(&qp->lock, flags);
+
+       /* XXX can do better, improve this later by doing a by-hand scatterlist
+        * XXX walk, etc.
+        */
+       ent = qp->q + qp->tail;
+
+       ent->control = control_word_base(nbytes, 0, 0,
+                                        auth_type, digest_size,
+                                        false, true, false, false,
+                                        OPCODE_INPLACE_BIT |
+                                        OPCODE_AUTH_MAC);
+       ent->src_addr = __pa(walk.data);
+       ent->auth_key_addr = 0UL;
+       ent->auth_iv_addr = __pa(hash_loc);
+       ent->final_auth_state_addr = 0UL;
+       ent->enc_key_addr = 0UL;
+       ent->enc_iv_addr = 0UL;
+       ent->dest_addr = __pa(hash_loc);
+
+       nbytes = crypto_hash_walk_done(&walk, 0);
+       while (nbytes > 0) {
+               ent = spu_queue_next(qp, ent);
+
+               ent->control = (nbytes - 1);
+               ent->src_addr = __pa(walk.data);
+               ent->auth_key_addr = 0UL;
+               ent->auth_iv_addr = 0UL;
+               ent->final_auth_state_addr = 0UL;
+               ent->enc_key_addr = 0UL;
+               ent->enc_iv_addr = 0UL;
+               ent->dest_addr = 0UL;
+
+               nbytes = crypto_hash_walk_done(&walk, 0);
+       }
+       ent->control |= CONTROL_END_OF_BLOCK;
+
+       if (submit_and_wait_for_tail(qp, ent) != HV_EOK)
+               err = -EINVAL;
+       else
+               err = 0;
+
+       spin_unlock_irqrestore(&qp->lock, flags);
+
+       if (!err)
+               memcpy(req->result, hash_loc, result_size);
+out:
+       put_cpu();
+
+       return err;
+}
+
+static int n2_md5_async_digest(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+       struct md5_state *m = &ctx->u.md5;
+
+       if (unlikely(req->nbytes == 0)) {
+               static const char md5_zero[MD5_DIGEST_SIZE] = {
+                       0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04,
+                       0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e,
+               };
+
+               memcpy(req->result, md5_zero, MD5_DIGEST_SIZE);
+               return 0;
+       }
+       m->hash[0] = cpu_to_le32(0x67452301);
+       m->hash[1] = cpu_to_le32(0xefcdab89);
+       m->hash[2] = cpu_to_le32(0x98badcfe);
+       m->hash[3] = cpu_to_le32(0x10325476);
+
+       return n2_hash_async_digest(req, AUTH_TYPE_MD5,
+                                   MD5_DIGEST_SIZE, MD5_DIGEST_SIZE,
+                                   m->hash);
+}
+
+static int n2_sha1_async_digest(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+       struct sha1_state *s = &ctx->u.sha1;
+
+       if (unlikely(req->nbytes == 0)) {
+               static const char sha1_zero[SHA1_DIGEST_SIZE] = {
+                       0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32,
+                       0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, 0xaf, 0xd8,
+                       0x07, 0x09
+               };
+
+               memcpy(req->result, sha1_zero, SHA1_DIGEST_SIZE);
+               return 0;
+       }
+       s->state[0] = SHA1_H0;
+       s->state[1] = SHA1_H1;
+       s->state[2] = SHA1_H2;
+       s->state[3] = SHA1_H3;
+       s->state[4] = SHA1_H4;
+
+       return n2_hash_async_digest(req, AUTH_TYPE_SHA1,
+                                   SHA1_DIGEST_SIZE, SHA1_DIGEST_SIZE,
+                                   s->state);
+}
+
+static int n2_sha256_async_digest(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+       struct sha256_state *s = &ctx->u.sha256;
+
+       if (req->nbytes == 0) {
+               static const char sha256_zero[SHA256_DIGEST_SIZE] = {
+                       0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a,
+                       0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae,
+                       0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99,
+                       0x1b, 0x78, 0x52, 0xb8, 0x55
+               };
+
+               memcpy(req->result, sha256_zero, SHA256_DIGEST_SIZE);
+               return 0;
+       }
+       s->state[0] = SHA256_H0;
+       s->state[1] = SHA256_H1;
+       s->state[2] = SHA256_H2;
+       s->state[3] = SHA256_H3;
+       s->state[4] = SHA256_H4;
+       s->state[5] = SHA256_H5;
+       s->state[6] = SHA256_H6;
+       s->state[7] = SHA256_H7;
+
+       return n2_hash_async_digest(req, AUTH_TYPE_SHA256,
+                                   SHA256_DIGEST_SIZE, SHA256_DIGEST_SIZE,
+                                   s->state);
+}
+
+static int n2_sha224_async_digest(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+       struct sha256_state *s = &ctx->u.sha256;
+
+       if (req->nbytes == 0) {
+               static const char sha224_zero[SHA224_DIGEST_SIZE] = {
+                       0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, 0x47,
+                       0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, 0x15, 0xa2,
+                       0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, 0xc5, 0xb3, 0xe4,
+                       0x2f
+               };
+
+               memcpy(req->result, sha224_zero, SHA224_DIGEST_SIZE);
+               return 0;
+       }
+       s->state[0] = SHA224_H0;
+       s->state[1] = SHA224_H1;
+       s->state[2] = SHA224_H2;
+       s->state[3] = SHA224_H3;
+       s->state[4] = SHA224_H4;
+       s->state[5] = SHA224_H5;
+       s->state[6] = SHA224_H6;
+       s->state[7] = SHA224_H7;
+
+       return n2_hash_async_digest(req, AUTH_TYPE_SHA256,
+                                   SHA256_DIGEST_SIZE, SHA224_DIGEST_SIZE,
+                                   s->state);
+}
+
+struct n2_cipher_context {
+       int                     key_len;
+       int                     enc_type;
+       union {
+               u8              aes[AES_MAX_KEY_SIZE];
+               u8              des[DES_KEY_SIZE];
+               u8              des3[3 * DES_KEY_SIZE];
+               u8              arc4[258]; /* S-box, X, Y */
+       } key;
+};
+
+#define N2_CHUNK_ARR_LEN       16
+
+struct n2_crypto_chunk {
+       struct list_head        entry;
+       unsigned long           iv_paddr : 44;
+       unsigned long           arr_len : 20;
+       unsigned long           dest_paddr;
+       unsigned long           dest_final;
+       struct {
+               unsigned long   src_paddr : 44;
+               unsigned long   src_len : 20;
+       } arr[N2_CHUNK_ARR_LEN];
+};
+
+struct n2_request_context {
+       struct ablkcipher_walk  walk;
+       struct list_head        chunk_list;
+       struct n2_crypto_chunk  chunk;
+       u8                      temp_iv[16];
+};
+
+/* The SPU allows some level of flexibility for partial cipher blocks
+ * being specified in a descriptor.
+ *
+ * It merely requires that every descriptor's length field is at least
+ * as large as the cipher block size.  This means that a cipher block
+ * can span at most 2 descriptors.  However, this does not allow a
+ * partial block to span into the final descriptor as that would
+ * violate the rule (since every descriptor's length must be at lest
+ * the block size).  So, for example, assuming an 8 byte block size:
+ *
+ *     0xe --> 0xa --> 0x8
+ *
+ * is a valid length sequence, whereas:
+ *
+ *     0xe --> 0xb --> 0x7
+ *
+ * is not a valid sequence.
+ */
+
+struct n2_cipher_alg {
+       struct list_head        entry;
+       u8                      enc_type;
+       struct crypto_alg       alg;
+};
+
+static inline struct n2_cipher_alg *n2_cipher_alg(struct crypto_tfm *tfm)
+{
+       struct crypto_alg *alg = tfm->__crt_alg;
+
+       return container_of(alg, struct n2_cipher_alg, alg);
+}
+
+struct n2_cipher_request_context {
+       struct ablkcipher_walk  walk;
+};
+
+static int n2_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+                        unsigned int keylen)
+{
+       struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+       struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
+       struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
+
+       ctx->enc_type = (n2alg->enc_type & ENC_TYPE_CHAINING_MASK);
+
+       switch (keylen) {
+       case AES_KEYSIZE_128:
+               ctx->enc_type |= ENC_TYPE_ALG_AES128;
+               break;
+       case AES_KEYSIZE_192:
+               ctx->enc_type |= ENC_TYPE_ALG_AES192;
+               break;
+       case AES_KEYSIZE_256:
+               ctx->enc_type |= ENC_TYPE_ALG_AES256;
+               break;
+       default:
+               crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+
+       ctx->key_len = keylen;
+       memcpy(ctx->key.aes, key, keylen);
+       return 0;
+}
+
+static int n2_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+                        unsigned int keylen)
+{
+       struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+       struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
+       struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
+       u32 tmp[DES_EXPKEY_WORDS];
+       int err;
+
+       ctx->enc_type = n2alg->enc_type;
+
+       if (keylen != DES_KEY_SIZE) {
+               crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+
+       err = des_ekey(tmp, key);
+       if (err == 0 && (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+               tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
+               return -EINVAL;
+       }
+
+       ctx->key_len = keylen;
+       memcpy(ctx->key.des, key, keylen);
+       return 0;
+}
+
+static int n2_3des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+                         unsigned int keylen)
+{
+       struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+       struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
+       struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
+
+       ctx->enc_type = n2alg->enc_type;
+
+       if (keylen != (3 * DES_KEY_SIZE)) {
+               crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+       ctx->key_len = keylen;
+       memcpy(ctx->key.des3, key, keylen);
+       return 0;
+}
+
+static int n2_arc4_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+                         unsigned int keylen)
+{
+       struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+       struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
+       struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
+       u8 *s = ctx->key.arc4;
+       u8 *x = s + 256;
+       u8 *y = x + 1;
+       int i, j, k;
+
+       ctx->enc_type = n2alg->enc_type;
+
+       j = k = 0;
+       *x = 0;
+       *y = 0;
+       for (i = 0; i < 256; i++)
+               s[i] = i;
+       for (i = 0; i < 256; i++) {
+               u8 a = s[i];
+               j = (j + key[k] + a) & 0xff;
+               s[i] = s[j];
+               s[j] = a;
+               if (++k >= keylen)
+                       k = 0;
+       }
+
+       return 0;
+}
+
+static inline int cipher_descriptor_len(int nbytes, unsigned int block_size)
+{
+       int this_len = nbytes;
+
+       this_len -= (nbytes & (block_size - 1));
+       return this_len > (1 << 16) ? (1 << 16) : this_len;
+}
+
+static int __n2_crypt_chunk(struct crypto_tfm *tfm, struct n2_crypto_chunk *cp,
+                           struct spu_queue *qp, bool encrypt)
+{
+       struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
+       struct cwq_initial_entry *ent;
+       bool in_place;
+       int i;
+
+       ent = spu_queue_alloc(qp, cp->arr_len);
+       if (!ent) {
+               pr_info("queue_alloc() of %d fails\n",
+                       cp->arr_len);
+               return -EBUSY;
+       }
+
+       in_place = (cp->dest_paddr == cp->arr[0].src_paddr);
+
+       ent->control = control_word_base(cp->arr[0].src_len,
+                                        0, ctx->enc_type, 0, 0,
+                                        false, true, false, encrypt,
+                                        OPCODE_ENCRYPT |
+                                        (in_place ? OPCODE_INPLACE_BIT : 0));
+       ent->src_addr = cp->arr[0].src_paddr;
+       ent->auth_key_addr = 0UL;
+       ent->auth_iv_addr = 0UL;
+       ent->final_auth_state_addr = 0UL;
+       ent->enc_key_addr = __pa(&ctx->key);
+       ent->enc_iv_addr = cp->iv_paddr;
+       ent->dest_addr = (in_place ? 0UL : cp->dest_paddr);
+
+       for (i = 1; i < cp->arr_len; i++) {
+               ent = spu_queue_next(qp, ent);
+
+               ent->control = cp->arr[i].src_len - 1;
+               ent->src_addr = cp->arr[i].src_paddr;
+               ent->auth_key_addr = 0UL;
+               ent->auth_iv_addr = 0UL;
+               ent->final_auth_state_addr = 0UL;
+               ent->enc_key_addr = 0UL;
+               ent->enc_iv_addr = 0UL;
+               ent->dest_addr = 0UL;
+       }
+       ent->control |= CONTROL_END_OF_BLOCK;
+
+       return (spu_queue_submit(qp, ent) != HV_EOK) ? -EINVAL : 0;
+}
+
+static int n2_compute_chunks(struct ablkcipher_request *req)
+{
+       struct n2_request_context *rctx = ablkcipher_request_ctx(req);
+       struct ablkcipher_walk *walk = &rctx->walk;
+       struct n2_crypto_chunk *chunk;
+       unsigned long dest_prev;
+       unsigned int tot_len;
+       bool prev_in_place;
+       int err, nbytes;
+
+       ablkcipher_walk_init(walk, req->dst, req->src, req->nbytes);
+       err = ablkcipher_walk_phys(req, walk);
+       if (err)
+               return err;
+
+       INIT_LIST_HEAD(&rctx->chunk_list);
+
+       chunk = &rctx->chunk;
+       INIT_LIST_HEAD(&chunk->entry);
+
+       chunk->iv_paddr = 0UL;
+       chunk->arr_len = 0;
+       chunk->dest_paddr = 0UL;
+
+       prev_in_place = false;
+       dest_prev = ~0UL;
+       tot_len = 0;
+
+       while ((nbytes = walk->nbytes) != 0) {
+               unsigned long dest_paddr, src_paddr;
+               bool in_place;
+               int this_len;
+
+               src_paddr = (page_to_phys(walk->src.page) +
+                            walk->src.offset);
+               dest_paddr = (page_to_phys(walk->dst.page) +
+                             walk->dst.offset);
+               in_place = (src_paddr == dest_paddr);
+               this_len = cipher_descriptor_len(nbytes, walk->blocksize);
+
+               if (chunk->arr_len != 0) {
+                       if (in_place != prev_in_place ||
+                           (!prev_in_place &&
+                            dest_paddr != dest_prev) ||
+                           chunk->arr_len == N2_CHUNK_ARR_LEN ||
+                           tot_len + this_len > (1 << 16)) {
+                               chunk->dest_final = dest_prev;
+                               list_add_tail(&chunk->entry,
+                                             &rctx->chunk_list);
+                               chunk = kzalloc(sizeof(*chunk), GFP_ATOMIC);
+                               if (!chunk) {
+                                       err = -ENOMEM;
+                                       break;
+                               }
+                               INIT_LIST_HEAD(&chunk->entry);
+                       }
+               }
+               if (chunk->arr_len == 0) {
+                       chunk->dest_paddr = dest_paddr;
+                       tot_len = 0;
+               }
+               chunk->arr[chunk->arr_len].src_paddr = src_paddr;
+               chunk->arr[chunk->arr_len].src_len = this_len;
+               chunk->arr_len++;
+
+               dest_prev = dest_paddr + this_len;
+               prev_in_place = in_place;
+               tot_len += this_len;
+
+               err = ablkcipher_walk_done(req, walk, nbytes - this_len);
+               if (err)
+                       break;
+       }
+       if (!err && chunk->arr_len != 0) {
+               chunk->dest_final = dest_prev;
+               list_add_tail(&chunk->entry, &rctx->chunk_list);
+       }
+
+       return err;
+}
+
+static void n2_chunk_complete(struct ablkcipher_request *req, void *final_iv)
+{
+       struct n2_request_context *rctx = ablkcipher_request_ctx(req);
+       struct n2_crypto_chunk *c, *tmp;
+
+       if (final_iv)
+               memcpy(rctx->walk.iv, final_iv, rctx->walk.blocksize);
+
+       ablkcipher_walk_complete(&rctx->walk);
+       list_for_each_entry_safe(c, tmp, &rctx->chunk_list, entry) {
+               list_del(&c->entry);
+               if (unlikely(c != &rctx->chunk))
+                       kfree(c);
+       }
+
+}
+
+static int n2_do_ecb(struct ablkcipher_request *req, bool encrypt)
+{
+       struct n2_request_context *rctx = ablkcipher_request_ctx(req);
+       struct crypto_tfm *tfm = req->base.tfm;
+       int err = n2_compute_chunks(req);
+       struct n2_crypto_chunk *c, *tmp;
+       unsigned long flags, hv_ret;
+       struct spu_queue *qp;
+
+       if (err)
+               return err;
+
+       qp = cpu_to_cwq[get_cpu()];
+       err = -ENODEV;
+       if (!qp)
+               goto out;
+
+       spin_lock_irqsave(&qp->lock, flags);
+
+       list_for_each_entry_safe(c, tmp, &rctx->chunk_list, entry) {
+               err = __n2_crypt_chunk(tfm, c, qp, encrypt);
+               if (err)
+                       break;
+               list_del(&c->entry);
+               if (unlikely(c != &rctx->chunk))
+                       kfree(c);
+       }
+       if (!err) {
+               hv_ret = wait_for_tail(qp);
+               if (hv_ret != HV_EOK)
+                       err = -EINVAL;
+       }
+
+       spin_unlock_irqrestore(&qp->lock, flags);
+
+       put_cpu();
+
+out:
+       n2_chunk_complete(req, NULL);
+       return err;
+}
+
+static int n2_encrypt_ecb(struct ablkcipher_request *req)
+{
+       return n2_do_ecb(req, true);
+}
+
+static int n2_decrypt_ecb(struct ablkcipher_request *req)
+{
+       return n2_do_ecb(req, false);
+}
+
+static int n2_do_chaining(struct ablkcipher_request *req, bool encrypt)
+{
+       struct n2_request_context *rctx = ablkcipher_request_ctx(req);
+       struct crypto_tfm *tfm = req->base.tfm;
+       unsigned long flags, hv_ret, iv_paddr;
+       int err = n2_compute_chunks(req);
+       struct n2_crypto_chunk *c, *tmp;
+       struct spu_queue *qp;
+       void *final_iv_addr;
+
+       final_iv_addr = NULL;
+
+       if (err)
+               return err;
+
+       qp = cpu_to_cwq[get_cpu()];
+       err = -ENODEV;
+       if (!qp)
+               goto out;
+
+       spin_lock_irqsave(&qp->lock, flags);
+
+       if (encrypt) {
+               iv_paddr = __pa(rctx->walk.iv);
+               list_for_each_entry_safe(c, tmp, &rctx->chunk_list,
+                                        entry) {
+                       c->iv_paddr = iv_paddr;
+                       err = __n2_crypt_chunk(tfm, c, qp, true);
+                       if (err)
+                               break;
+                       iv_paddr = c->dest_final - rctx->walk.blocksize;
+                       list_del(&c->entry);
+                       if (unlikely(c != &rctx->chunk))
+                               kfree(c);
+               }
+               final_iv_addr = __va(iv_paddr);
+       } else {
+               list_for_each_entry_safe_reverse(c, tmp, &rctx->chunk_list,
+                                                entry) {
+                       if (c == &rctx->chunk) {
+                               iv_paddr = __pa(rctx->walk.iv);
+                       } else {
+                               iv_paddr = (tmp->arr[tmp->arr_len-1].src_paddr +
+                                           tmp->arr[tmp->arr_len-1].src_len -
+                                           rctx->walk.blocksize);
+                       }
+                       if (!final_iv_addr) {
+                               unsigned long pa;
+
+                               pa = (c->arr[c->arr_len-1].src_paddr +
+                                     c->arr[c->arr_len-1].src_len -
+                                     rctx->walk.blocksize);
+                               final_iv_addr = rctx->temp_iv;
+                               memcpy(rctx->temp_iv, __va(pa),
+                                      rctx->walk.blocksize);
+                       }
+                       c->iv_paddr = iv_paddr;
+                       err = __n2_crypt_chunk(tfm, c, qp, false);
+                       if (err)
+                               break;
+                       list_del(&c->entry);
+                       if (unlikely(c != &rctx->chunk))
+                               kfree(c);
+               }
+       }
+       if (!err) {
+               hv_ret = wait_for_tail(qp);
+               if (hv_ret != HV_EOK)
+                       err = -EINVAL;
+       }
+
+       spin_unlock_irqrestore(&qp->lock, flags);
+
+       put_cpu();
+
+out:
+       n2_chunk_complete(req, err ? NULL : final_iv_addr);
+       return err;
+}
+
+static int n2_encrypt_chaining(struct ablkcipher_request *req)
+{
+       return n2_do_chaining(req, true);
+}
+
+static int n2_decrypt_chaining(struct ablkcipher_request *req)
+{
+       return n2_do_chaining(req, false);
+}
+
+struct n2_cipher_tmpl {
+       const char              *name;
+       const char              *drv_name;
+       u8                      block_size;
+       u8                      enc_type;
+       struct ablkcipher_alg   ablkcipher;
+};
+
+static const struct n2_cipher_tmpl cipher_tmpls[] = {
+       /* ARC4: only ECB is supported (chaining bits ignored) */
+       {       .name           = "ecb(arc4)",
+               .drv_name       = "ecb-arc4",
+               .block_size     = 1,
+               .enc_type       = (ENC_TYPE_ALG_RC4_STREAM |
+                                  ENC_TYPE_CHAINING_ECB),
+               .ablkcipher     = {
+                       .min_keysize    = 1,
+                       .max_keysize    = 256,
+                       .setkey         = n2_arc4_setkey,
+                       .encrypt        = n2_encrypt_ecb,
+                       .decrypt        = n2_decrypt_ecb,
+               },
+       },
+
+       /* DES: ECB CBC and CFB are supported */
+       {       .name           = "ecb(des)",
+               .drv_name       = "ecb-des",
+               .block_size     = DES_BLOCK_SIZE,
+               .enc_type       = (ENC_TYPE_ALG_DES |
+                                  ENC_TYPE_CHAINING_ECB),
+               .ablkcipher     = {
+                       .min_keysize    = DES_KEY_SIZE,
+                       .max_keysize    = DES_KEY_SIZE,
+                       .setkey         = n2_des_setkey,
+                       .encrypt        = n2_encrypt_ecb,
+                       .decrypt        = n2_decrypt_ecb,
+               },
+       },
+       {       .name           = "cbc(des)",
+               .drv_name       = "cbc-des",
+               .block_size     = DES_BLOCK_SIZE,
+               .enc_type       = (ENC_TYPE_ALG_DES |
+                                  ENC_TYPE_CHAINING_CBC),
+               .ablkcipher     = {
+                       .ivsize         = DES_BLOCK_SIZE,
+                       .min_keysize    = DES_KEY_SIZE,
+                       .max_keysize    = DES_KEY_SIZE,
+                       .setkey         = n2_des_setkey,
+                       .encrypt        = n2_encrypt_chaining,
+                       .decrypt        = n2_decrypt_chaining,
+               },
+       },
+       {       .name           = "cfb(des)",
+               .drv_name       = "cfb-des",
+               .block_size     = DES_BLOCK_SIZE,
+               .enc_type       = (ENC_TYPE_ALG_DES |
+                                  ENC_TYPE_CHAINING_CFB),
+               .ablkcipher     = {
+                       .min_keysize    = DES_KEY_SIZE,
+                       .max_keysize    = DES_KEY_SIZE,
+                       .setkey         = n2_des_setkey,
+                       .encrypt        = n2_encrypt_chaining,
+                       .decrypt        = n2_decrypt_chaining,
+               },
+       },
+
+       /* 3DES: ECB CBC and CFB are supported */
+       {       .name           = "ecb(des3_ede)",
+               .drv_name       = "ecb-3des",
+               .block_size     = DES_BLOCK_SIZE,
+               .enc_type       = (ENC_TYPE_ALG_3DES |
+                                  ENC_TYPE_CHAINING_ECB),
+               .ablkcipher     = {
+                       .min_keysize    = 3 * DES_KEY_SIZE,
+                       .max_keysize    = 3 * DES_KEY_SIZE,
+                       .setkey         = n2_3des_setkey,
+                       .encrypt        = n2_encrypt_ecb,
+                       .decrypt        = n2_decrypt_ecb,
+               },
+       },
+       {       .name           = "cbc(des3_ede)",
+               .drv_name       = "cbc-3des",
+               .block_size     = DES_BLOCK_SIZE,
+               .enc_type       = (ENC_TYPE_ALG_3DES |
+                                  ENC_TYPE_CHAINING_CBC),
+               .ablkcipher     = {
+                       .ivsize         = DES_BLOCK_SIZE,
+                       .min_keysize    = 3 * DES_KEY_SIZE,
+                       .max_keysize    = 3 * DES_KEY_SIZE,
+                       .setkey         = n2_3des_setkey,
+                       .encrypt        = n2_encrypt_chaining,
+                       .decrypt        = n2_decrypt_chaining,
+               },
+       },
+       {       .name           = "cfb(des3_ede)",
+               .drv_name       = "cfb-3des",
+               .block_size     = DES_BLOCK_SIZE,
+               .enc_type       = (ENC_TYPE_ALG_3DES |
+                                  ENC_TYPE_CHAINING_CFB),
+               .ablkcipher     = {
+                       .min_keysize    = 3 * DES_KEY_SIZE,
+                       .max_keysize    = 3 * DES_KEY_SIZE,
+                       .setkey         = n2_3des_setkey,
+                       .encrypt        = n2_encrypt_chaining,
+                       .decrypt        = n2_decrypt_chaining,
+               },
+       },
+       /* AES: ECB CBC and CTR are supported */
+       {       .name           = "ecb(aes)",
+               .drv_name       = "ecb-aes",
+               .block_size     = AES_BLOCK_SIZE,
+               .enc_type       = (ENC_TYPE_ALG_AES128 |
+                                  ENC_TYPE_CHAINING_ECB),
+               .ablkcipher     = {
+                       .min_keysize    = AES_MIN_KEY_SIZE,
+                       .max_keysize    = AES_MAX_KEY_SIZE,
+                       .setkey         = n2_aes_setkey,
+                       .encrypt        = n2_encrypt_ecb,
+                       .decrypt        = n2_decrypt_ecb,
+               },
+       },
+       {       .name           = "cbc(aes)",
+               .drv_name       = "cbc-aes",
+               .block_size     = AES_BLOCK_SIZE,
+               .enc_type       = (ENC_TYPE_ALG_AES128 |
+                                  ENC_TYPE_CHAINING_CBC),
+               .ablkcipher     = {
+                       .ivsize         = AES_BLOCK_SIZE,
+                       .min_keysize    = AES_MIN_KEY_SIZE,
+                       .max_keysize    = AES_MAX_KEY_SIZE,
+                       .setkey         = n2_aes_setkey,
+                       .encrypt        = n2_encrypt_chaining,
+                       .decrypt        = n2_decrypt_chaining,
+               },
+       },
+       {       .name           = "ctr(aes)",
+               .drv_name       = "ctr-aes",
+               .block_size     = AES_BLOCK_SIZE,
+               .enc_type       = (ENC_TYPE_ALG_AES128 |
+                                  ENC_TYPE_CHAINING_COUNTER),
+               .ablkcipher     = {
+                       .ivsize         = AES_BLOCK_SIZE,
+                       .min_keysize    = AES_MIN_KEY_SIZE,
+                       .max_keysize    = AES_MAX_KEY_SIZE,
+                       .setkey         = n2_aes_setkey,
+                       .encrypt        = n2_encrypt_chaining,
+                       .decrypt        = n2_encrypt_chaining,
+               },
+       },
+
+};
+#define NUM_CIPHER_TMPLS ARRAY_SIZE(cipher_tmpls)
+
+static LIST_HEAD(cipher_algs);
+
+struct n2_hash_tmpl {
+       const char      *name;
+       int             (*digest)(struct ahash_request *req);
+       u8              digest_size;
+       u8              block_size;
+};
+static const struct n2_hash_tmpl hash_tmpls[] = {
+       { .name         = "md5",
+         .digest       = n2_md5_async_digest,
+         .digest_size  = MD5_DIGEST_SIZE,
+         .block_size   = MD5_HMAC_BLOCK_SIZE },
+       { .name         = "sha1",
+         .digest       = n2_sha1_async_digest,
+         .digest_size  = SHA1_DIGEST_SIZE,
+         .block_size   = SHA1_BLOCK_SIZE },
+       { .name         = "sha256",
+         .digest       = n2_sha256_async_digest,
+         .digest_size  = SHA256_DIGEST_SIZE,
+         .block_size   = SHA256_BLOCK_SIZE },
+       { .name         = "sha224",
+         .digest       = n2_sha224_async_digest,
+         .digest_size  = SHA224_DIGEST_SIZE,
+         .block_size   = SHA224_BLOCK_SIZE },
+};
+#define NUM_HASH_TMPLS ARRAY_SIZE(hash_tmpls)
+
+struct n2_ahash_alg {
+       struct list_head        entry;
+       struct ahash_alg        alg;
+};
+static LIST_HEAD(ahash_algs);
+
+static int algs_registered;
+
+static void __n2_unregister_algs(void)
+{
+       struct n2_cipher_alg *cipher, *cipher_tmp;
+       struct n2_ahash_alg *alg, *alg_tmp;
+
+       list_for_each_entry_safe(cipher, cipher_tmp, &cipher_algs, entry) {
+               crypto_unregister_alg(&cipher->alg);
+               list_del(&cipher->entry);
+               kfree(cipher);
+       }
+       list_for_each_entry_safe(alg, alg_tmp, &ahash_algs, entry) {
+               crypto_unregister_ahash(&alg->alg);
+               list_del(&alg->entry);
+               kfree(alg);
+       }
+}
+
+static int n2_cipher_cra_init(struct crypto_tfm *tfm)
+{
+       tfm->crt_ablkcipher.reqsize = sizeof(struct n2_request_context);
+       return 0;
+}
+
+static int __devinit __n2_register_one_cipher(const struct n2_cipher_tmpl *tmpl)
+{
+       struct n2_cipher_alg *p = kzalloc(sizeof(*p), GFP_KERNEL);
+       struct crypto_alg *alg;
+       int err;
+
+       if (!p)
+               return -ENOMEM;
+
+       alg = &p->alg;
+
+       snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name);
+       snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s-n2", tmpl->drv_name);
+       alg->cra_priority = N2_CRA_PRIORITY;
+       alg->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC;
+       alg->cra_blocksize = tmpl->block_size;
+       p->enc_type = tmpl->enc_type;
+       alg->cra_ctxsize = sizeof(struct n2_cipher_context);
+       alg->cra_type = &crypto_ablkcipher_type;
+       alg->cra_u.ablkcipher = tmpl->ablkcipher;
+       alg->cra_init = n2_cipher_cra_init;
+       alg->cra_module = THIS_MODULE;
+
+       list_add(&p->entry, &cipher_algs);
+       err = crypto_register_alg(alg);
+       if (err) {
+               list_del(&p->entry);
+               kfree(p);
+       }
+       return err;
+}
+
+static int __devinit __n2_register_one_ahash(const struct n2_hash_tmpl *tmpl)
+{
+       struct n2_ahash_alg *p = kzalloc(sizeof(*p), GFP_KERNEL);
+       struct hash_alg_common *halg;
+       struct crypto_alg *base;
+       struct ahash_alg *ahash;
+       int err;
+
+       if (!p)
+               return -ENOMEM;
+
+       ahash = &p->alg;
+       ahash->init = n2_hash_async_init;
+       ahash->update = n2_hash_async_update;
+       ahash->final = n2_hash_async_final;
+       ahash->finup = n2_hash_async_finup;
+       ahash->digest = tmpl->digest;
+
+       halg = &ahash->halg;
+       halg->digestsize = tmpl->digest_size;
+
+       base = &halg->base;
+       snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name);
+       snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s-n2", tmpl->name);
+       base->cra_priority = N2_CRA_PRIORITY;
+       base->cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK;
+       base->cra_blocksize = tmpl->block_size;
+       base->cra_ctxsize = sizeof(struct n2_hash_ctx);
+       base->cra_module = THIS_MODULE;
+       base->cra_init = n2_hash_cra_init;
+       base->cra_exit = n2_hash_cra_exit;
+
+       list_add(&p->entry, &ahash_algs);
+       err = crypto_register_ahash(ahash);
+       if (err) {
+               list_del(&p->entry);
+               kfree(p);
+       }
+       return err;
+}
+
+static int __devinit n2_register_algs(void)
+{
+       int i, err = 0;
+
+       mutex_lock(&spu_lock);
+       if (algs_registered++)
+               goto out;
+
+       for (i = 0; i < NUM_HASH_TMPLS; i++) {
+               err = __n2_register_one_ahash(&hash_tmpls[i]);
+               if (err) {
+                       __n2_unregister_algs();
+                       goto out;
+               }
+       }
+       for (i = 0; i < NUM_CIPHER_TMPLS; i++) {
+               err = __n2_register_one_cipher(&cipher_tmpls[i]);
+               if (err) {
+                       __n2_unregister_algs();
+                       goto out;
+               }
+       }
+
+out:
+       mutex_unlock(&spu_lock);
+       return err;
+}
+
+static void __exit n2_unregister_algs(void)
+{
+       mutex_lock(&spu_lock);
+       if (!--algs_registered)
+               __n2_unregister_algs();
+       mutex_unlock(&spu_lock);
+}
+
+/* To map CWQ queues to interrupt sources, the hypervisor API provides
+ * a devino.  This isn't very useful to us because all of the
+ * interrupts listed in the of_device node have been translated to
+ * Linux virtual IRQ cookie numbers.
+ *
+ * So we have to back-translate, going through the 'intr' and 'ino'
+ * property tables of the n2cp MDESC node, matching it with the OF
+ * 'interrupts' property entries, in order to to figure out which
+ * devino goes to which already-translated IRQ.
+ */
+static int find_devino_index(struct of_device *dev, struct spu_mdesc_info *ip,
+                            unsigned long dev_ino)
+{
+       const unsigned int *dev_intrs;
+       unsigned int intr;
+       int i;
+
+       for (i = 0; i < ip->num_intrs; i++) {
+               if (ip->ino_table[i].ino == dev_ino)
+                       break;
+       }
+       if (i == ip->num_intrs)
+               return -ENODEV;
+
+       intr = ip->ino_table[i].intr;
+
+       dev_intrs = of_get_property(dev->node, "interrupts", NULL);
+       if (!dev_intrs)
+               return -ENODEV;
+
+       for (i = 0; i < dev->num_irqs; i++) {
+               if (dev_intrs[i] == intr)
+                       return i;
+       }
+
+       return -ENODEV;
+}
+
+static int spu_map_ino(struct of_device *dev, struct spu_mdesc_info *ip,
+                      const char *irq_name, struct spu_queue *p,
+                      irq_handler_t handler)
+{
+       unsigned long herr;
+       int index;
+
+       herr = sun4v_ncs_qhandle_to_devino(p->qhandle, &p->devino);
+       if (herr)
+               return -EINVAL;
+
+       index = find_devino_index(dev, ip, p->devino);
+       if (index < 0)
+               return index;
+
+       p->irq = dev->irqs[index];
+
+       sprintf(p->irq_name, "%s-%d", irq_name, index);
+
+       return request_irq(p->irq, handler, IRQF_SAMPLE_RANDOM,
+                          p->irq_name, p);
+}
+
+static struct kmem_cache *queue_cache[2];
+
+static void *new_queue(unsigned long q_type)
+{
+       return kmem_cache_zalloc(queue_cache[q_type - 1], GFP_KERNEL);
+}
+
+static void free_queue(void *p, unsigned long q_type)
+{
+       return kmem_cache_free(queue_cache[q_type - 1], p);
+}
+
+static int queue_cache_init(void)
+{
+       if (!queue_cache[HV_NCS_QTYPE_MAU - 1])
+               queue_cache[HV_NCS_QTYPE_MAU - 1] =
+                       kmem_cache_create("cwq_queue",
+                                         (MAU_NUM_ENTRIES *
+                                          MAU_ENTRY_SIZE),
+                                         MAU_ENTRY_SIZE, 0, NULL);
+       if (!queue_cache[HV_NCS_QTYPE_MAU - 1])
+               return -ENOMEM;
+
+       if (!queue_cache[HV_NCS_QTYPE_CWQ - 1])
+               queue_cache[HV_NCS_QTYPE_CWQ - 1] =
+                       kmem_cache_create("cwq_queue",
+                                         (CWQ_NUM_ENTRIES *
+                                          CWQ_ENTRY_SIZE),
+                                         CWQ_ENTRY_SIZE, 0, NULL);
+       if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) {
+               kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+static void queue_cache_destroy(void)
+{
+       kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
+       kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]);
+}
+
+static int spu_queue_register(struct spu_queue *p, unsigned long q_type)
+{
+       cpumask_var_t old_allowed;
+       unsigned long hv_ret;
+
+       if (cpumask_empty(&p->sharing))
+               return -EINVAL;
+
+       if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
+               return -ENOMEM;
+
+       cpumask_copy(old_allowed, &current->cpus_allowed);
+
+       set_cpus_allowed_ptr(current, &p->sharing);
+
+       hv_ret = sun4v_ncs_qconf(q_type, __pa(p->q),
+                                CWQ_NUM_ENTRIES, &p->qhandle);
+       if (!hv_ret)
+               sun4v_ncs_sethead_marker(p->qhandle, 0);
+
+       set_cpus_allowed_ptr(current, old_allowed);
+
+       free_cpumask_var(old_allowed);
+
+       return (hv_ret ? -EINVAL : 0);
+}
+
+static int spu_queue_setup(struct spu_queue *p)
+{
+       int err;
+
+       p->q = new_queue(p->q_type);
+       if (!p->q)
+               return -ENOMEM;
+
+       err = spu_queue_register(p, p->q_type);
+       if (err) {
+               free_queue(p->q, p->q_type);
+               p->q = NULL;
+       }
+
+       return err;
+}
+
+static void spu_queue_destroy(struct spu_queue *p)
+{
+       unsigned long hv_ret;
+
+       if (!p->q)
+               return;
+
+       hv_ret = sun4v_ncs_qconf(p->q_type, p->qhandle, 0, &p->qhandle);
+
+       if (!hv_ret)
+               free_queue(p->q, p->q_type);
+}
+
+static void spu_list_destroy(struct list_head *list)
+{
+       struct spu_queue *p, *n;
+
+       list_for_each_entry_safe(p, n, list, list) {
+               int i;
+
+               for (i = 0; i < NR_CPUS; i++) {
+                       if (cpu_to_cwq[i] == p)
+                               cpu_to_cwq[i] = NULL;
+               }
+
+               if (p->irq) {
+                       free_irq(p->irq, p);
+                       p->irq = 0;
+               }
+               spu_queue_destroy(p);
+               list_del(&p->list);
+               kfree(p);
+       }
+}
+
+/* Walk the backward arcs of a CWQ 'exec-unit' node,
+ * gathering cpu membership information.
+ */
+static int spu_mdesc_walk_arcs(struct mdesc_handle *mdesc,
+                              struct of_device *dev,
+                              u64 node, struct spu_queue *p,
+                              struct spu_queue **table)
+{
+       u64 arc;
+
+       mdesc_for_each_arc(arc, mdesc, node, MDESC_ARC_TYPE_BACK) {
+               u64 tgt = mdesc_arc_target(mdesc, arc);
+               const char *name = mdesc_node_name(mdesc, tgt);
+               const u64 *id;
+
+               if (strcmp(name, "cpu"))
+                       continue;
+               id = mdesc_get_property(mdesc, tgt, "id", NULL);
+               if (table[*id] != NULL) {
+                       dev_err(&dev->dev, "%s: SPU cpu slot already set.\n",
+                               dev->node->full_name);
+                       return -EINVAL;
+               }
+               cpu_set(*id, p->sharing);
+               table[*id] = p;
+       }
+       return 0;
+}
+
+/* Process an 'exec-unit' MDESC node of type 'cwq'.  */
+static int handle_exec_unit(struct spu_mdesc_info *ip, struct list_head *list,
+                           struct of_device *dev, struct mdesc_handle *mdesc,
+                           u64 node, const char *iname, unsigned long q_type,
+                           irq_handler_t handler, struct spu_queue **table)
+{
+       struct spu_queue *p;
+       int err;
+
+       p = kzalloc(sizeof(struct spu_queue), GFP_KERNEL);
+       if (!p) {
+               dev_err(&dev->dev, "%s: Could not allocate SPU queue.\n",
+                       dev->node->full_name);
+               return -ENOMEM;
+       }
+
+       cpus_clear(p->sharing);
+       spin_lock_init(&p->lock);
+       p->q_type = q_type;
+       INIT_LIST_HEAD(&p->jobs);
+       list_add(&p->list, list);
+
+       err = spu_mdesc_walk_arcs(mdesc, dev, node, p, table);
+       if (err)
+               return err;
+
+       err = spu_queue_setup(p);
+       if (err)
+               return err;
+
+       return spu_map_ino(dev, ip, iname, p, handler);
+}
+
+static int spu_mdesc_scan(struct mdesc_handle *mdesc, struct of_device *dev,
+                         struct spu_mdesc_info *ip, struct list_head *list,
+                         const char *exec_name, unsigned long q_type,
+                         irq_handler_t handler, struct spu_queue **table)
+{
+       int err = 0;
+       u64 node;
+
+       mdesc_for_each_node_by_name(mdesc, node, "exec-unit") {
+               const char *type;
+
+               type = mdesc_get_property(mdesc, node, "type", NULL);
+               if (!type || strcmp(type, exec_name))
+                       continue;
+
+               err = handle_exec_unit(ip, list, dev, mdesc, node,
+                                      exec_name, q_type, handler, table);
+               if (err) {
+                       spu_list_destroy(list);
+                       break;
+               }
+       }
+
+       return err;
+}
+
+static int __devinit get_irq_props(struct mdesc_handle *mdesc, u64 node,
+                                  struct spu_mdesc_info *ip)
+{
+       const u64 *intr, *ino;
+       int intr_len, ino_len;
+       int i;
+
+       intr = mdesc_get_property(mdesc, node, "intr", &intr_len);
+       if (!intr)
+               return -ENODEV;
+
+       ino = mdesc_get_property(mdesc, node, "ino", &ino_len);
+       if (!intr)
+               return -ENODEV;
+
+       if (intr_len != ino_len)
+               return -EINVAL;
+
+       ip->num_intrs = intr_len / sizeof(u64);
+       ip->ino_table = kzalloc((sizeof(struct ino_blob) *
+                                ip->num_intrs),
+                               GFP_KERNEL);
+       if (!ip->ino_table)
+               return -ENOMEM;
+
+       for (i = 0; i < ip->num_intrs; i++) {
+               struct ino_blob *b = &ip->ino_table[i];
+               b->intr = intr[i];
+               b->ino = ino[i];
+       }
+
+       return 0;
+}
+
+static int __devinit grab_mdesc_irq_props(struct mdesc_handle *mdesc,
+                                         struct of_device *dev,
+                                         struct spu_mdesc_info *ip,
+                                         const char *node_name)
+{
+       const unsigned int *reg;
+       u64 node;
+
+       reg = of_get_property(dev->node, "reg", NULL);
+       if (!reg)
+               return -ENODEV;
+
+       mdesc_for_each_node_by_name(mdesc, node, "virtual-device") {
+               const char *name;
+               const u64 *chdl;
+
+               name = mdesc_get_property(mdesc, node, "name", NULL);
+               if (!name || strcmp(name, node_name))
+                       continue;
+               chdl = mdesc_get_property(mdesc, node, "cfg-handle", NULL);
+               if (!chdl || (*chdl != *reg))
+                       continue;
+               ip->cfg_handle = *chdl;
+               return get_irq_props(mdesc, node, ip);
+       }
+
+       return -ENODEV;
+}
+
+static unsigned long n2_spu_hvapi_major;
+static unsigned long n2_spu_hvapi_minor;
+
+static int __devinit n2_spu_hvapi_register(void)
+{
+       int err;
+
+       n2_spu_hvapi_major = 2;
+       n2_spu_hvapi_minor = 0;
+
+       err = sun4v_hvapi_register(HV_GRP_NCS,
+                                  n2_spu_hvapi_major,
+                                  &n2_spu_hvapi_minor);
+
+       if (!err)
+               pr_info("Registered NCS HVAPI version %lu.%lu\n",
+                       n2_spu_hvapi_major,
+                       n2_spu_hvapi_minor);
+
+       return err;
+}
+
+static void n2_spu_hvapi_unregister(void)
+{
+       sun4v_hvapi_unregister(HV_GRP_NCS);
+}
+
+static int global_ref;
+
+static int __devinit grab_global_resources(void)
+{
+       int err = 0;
+
+       mutex_lock(&spu_lock);
+
+       if (global_ref++)
+               goto out;
+
+       err = n2_spu_hvapi_register();
+       if (err)
+               goto out;
+
+       err = queue_cache_init();
+       if (err)
+               goto out_hvapi_release;
+
+       err = -ENOMEM;
+       cpu_to_cwq = kzalloc(sizeof(struct spu_queue *) * NR_CPUS,
+                            GFP_KERNEL);
+       if (!cpu_to_cwq)
+               goto out_queue_cache_destroy;
+
+       cpu_to_mau = kzalloc(sizeof(struct spu_queue *) * NR_CPUS,
+                            GFP_KERNEL);
+       if (!cpu_to_mau)
+               goto out_free_cwq_table;
+
+       err = 0;
+
+out:
+       if (err)
+               global_ref--;
+       mutex_unlock(&spu_lock);
+       return err;
+
+out_free_cwq_table:
+       kfree(cpu_to_cwq);
+       cpu_to_cwq = NULL;
+
+out_queue_cache_destroy:
+       queue_cache_destroy();
+
+out_hvapi_release:
+       n2_spu_hvapi_unregister();
+       goto out;
+}
+
+static void release_global_resources(void)
+{
+       mutex_lock(&spu_lock);
+       if (!--global_ref) {
+               kfree(cpu_to_cwq);
+               cpu_to_cwq = NULL;
+
+               kfree(cpu_to_mau);
+               cpu_to_mau = NULL;
+
+               queue_cache_destroy();
+               n2_spu_hvapi_unregister();
+       }
+       mutex_unlock(&spu_lock);
+}
+
+static struct n2_crypto * __devinit alloc_n2cp(void)
+{
+       struct n2_crypto *np = kzalloc(sizeof(struct n2_crypto), GFP_KERNEL);
+
+       if (np)
+               INIT_LIST_HEAD(&np->cwq_list);
+
+       return np;
+}
+
+static void free_n2cp(struct n2_crypto *np)
+{
+       if (np->cwq_info.ino_table) {
+               kfree(np->cwq_info.ino_table);
+               np->cwq_info.ino_table = NULL;
+       }
+
+       kfree(np);
+}
+
+static void __devinit n2_spu_driver_version(void)
+{
+       static int n2_spu_version_printed;
+
+       if (n2_spu_version_printed++ == 0)
+               pr_info("%s", version);
+}
+
+static int __devinit n2_crypto_probe(struct of_device *dev,
+                                    const struct of_device_id *match)
+{
+       struct mdesc_handle *mdesc;
+       const char *full_name;
+       struct n2_crypto *np;
+       int err;
+
+       n2_spu_driver_version();
+
+       full_name = dev->node->full_name;
+       pr_info("Found N2CP at %s\n", full_name);
+
+       np = alloc_n2cp();
+       if (!np) {
+               dev_err(&dev->dev, "%s: Unable to allocate n2cp.\n",
+                       full_name);
+               return -ENOMEM;
+       }
+
+       err = grab_global_resources();
+       if (err) {
+               dev_err(&dev->dev, "%s: Unable to grab "
+                       "global resources.\n", full_name);
+               goto out_free_n2cp;
+       }
+
+       mdesc = mdesc_grab();
+
+       if (!mdesc) {
+               dev_err(&dev->dev, "%s: Unable to grab MDESC.\n",
+                       full_name);
+               err = -ENODEV;
+               goto out_free_global;
+       }
+       err = grab_mdesc_irq_props(mdesc, dev, &np->cwq_info, "n2cp");
+       if (err) {
+               dev_err(&dev->dev, "%s: Unable to grab IRQ props.\n",
+                       full_name);
+               mdesc_release(mdesc);
+               goto out_free_global;
+       }
+
+       err = spu_mdesc_scan(mdesc, dev, &np->cwq_info, &np->cwq_list,
+                            "cwq", HV_NCS_QTYPE_CWQ, cwq_intr,
+                            cpu_to_cwq);
+       mdesc_release(mdesc);
+
+       if (err) {
+               dev_err(&dev->dev, "%s: CWQ MDESC scan failed.\n",
+                       full_name);
+               goto out_free_global;
+       }
+
+       err = n2_register_algs();
+       if (err) {
+               dev_err(&dev->dev, "%s: Unable to register algorithms.\n",
+                       full_name);
+               goto out_free_spu_list;
+       }
+
+       dev_set_drvdata(&dev->dev, np);
+
+       return 0;
+
+out_free_spu_list:
+       spu_list_destroy(&np->cwq_list);
+
+out_free_global:
+       release_global_resources();
+
+out_free_n2cp:
+       free_n2cp(np);
+
+       return err;
+}
+
+static int __devexit n2_crypto_remove(struct of_device *dev)
+{
+       struct n2_crypto *np = dev_get_drvdata(&dev->dev);
+
+       n2_unregister_algs();
+
+       spu_list_destroy(&np->cwq_list);
+
+       release_global_resources();
+
+       free_n2cp(np);
+
+       return 0;
+}
+
+static struct n2_mau * __devinit alloc_ncp(void)
+{
+       struct n2_mau *mp = kzalloc(sizeof(struct n2_mau), GFP_KERNEL);
+
+       if (mp)
+               INIT_LIST_HEAD(&mp->mau_list);
+
+       return mp;
+}
+
+static void free_ncp(struct n2_mau *mp)
+{
+       if (mp->mau_info.ino_table) {
+               kfree(mp->mau_info.ino_table);
+               mp->mau_info.ino_table = NULL;
+       }
+
+       kfree(mp);
+}
+
+static int __devinit n2_mau_probe(struct of_device *dev,
+                                    const struct of_device_id *match)
+{
+       struct mdesc_handle *mdesc;
+       const char *full_name;
+       struct n2_mau *mp;
+       int err;
+
+       n2_spu_driver_version();
+
+       full_name = dev->node->full_name;
+       pr_info("Found NCP at %s\n", full_name);
+
+       mp = alloc_ncp();
+       if (!mp) {
+               dev_err(&dev->dev, "%s: Unable to allocate ncp.\n",
+                       full_name);
+               return -ENOMEM;
+       }
+
+       err = grab_global_resources();
+       if (err) {
+               dev_err(&dev->dev, "%s: Unable to grab "
+                       "global resources.\n", full_name);
+               goto out_free_ncp;
+       }
+
+       mdesc = mdesc_grab();
+
+       if (!mdesc) {
+               dev_err(&dev->dev, "%s: Unable to grab MDESC.\n",
+                       full_name);
+               err = -ENODEV;
+               goto out_free_global;
+       }
+
+       err = grab_mdesc_irq_props(mdesc, dev, &mp->mau_info, "ncp");
+       if (err) {
+               dev_err(&dev->dev, "%s: Unable to grab IRQ props.\n",
+                       full_name);
+               mdesc_release(mdesc);
+               goto out_free_global;
+       }
+
+       err = spu_mdesc_scan(mdesc, dev, &mp->mau_info, &mp->mau_list,
+                            "mau", HV_NCS_QTYPE_MAU, mau_intr,
+                            cpu_to_mau);
+       mdesc_release(mdesc);
+
+       if (err) {
+               dev_err(&dev->dev, "%s: MAU MDESC scan failed.\n",
+                       full_name);
+               goto out_free_global;
+       }
+
+       dev_set_drvdata(&dev->dev, mp);
+
+       return 0;
+
+out_free_global:
+       release_global_resources();
+
+out_free_ncp:
+       free_ncp(mp);
+
+       return err;
+}
+
+static int __devexit n2_mau_remove(struct of_device *dev)
+{
+       struct n2_mau *mp = dev_get_drvdata(&dev->dev);
+
+       spu_list_destroy(&mp->mau_list);
+
+       release_global_resources();
+
+       free_ncp(mp);
+
+       return 0;
+}
+
+static struct of_device_id n2_crypto_match[] = {
+       {
+               .name = "n2cp",
+               .compatible = "SUNW,n2-cwq",
+       },
+       {
+               .name = "n2cp",
+               .compatible = "SUNW,vf-cwq",
+       },
+       {},
+};
+
+MODULE_DEVICE_TABLE(of, n2_crypto_match);
+
+static struct of_platform_driver n2_crypto_driver = {
+       .name           =       "n2cp",
+       .match_table    =       n2_crypto_match,
+       .probe          =       n2_crypto_probe,
+       .remove         =       __devexit_p(n2_crypto_remove),
+};
+
+static struct of_device_id n2_mau_match[] = {
+       {
+               .name = "ncp",
+               .compatible = "SUNW,n2-mau",
+       },
+       {
+               .name = "ncp",
+               .compatible = "SUNW,vf-mau",
+       },
+       {},
+};
+
+MODULE_DEVICE_TABLE(of, n2_mau_match);
+
+static struct of_platform_driver n2_mau_driver = {
+       .name           =       "ncp",
+       .match_table    =       n2_mau_match,
+       .probe          =       n2_mau_probe,
+       .remove         =       __devexit_p(n2_mau_remove),
+};
+
+static int __init n2_init(void)
+{
+       int err = of_register_driver(&n2_crypto_driver, &of_bus_type);
+
+       if (!err) {
+               err = of_register_driver(&n2_mau_driver, &of_bus_type);
+               if (err)
+                       of_unregister_driver(&n2_crypto_driver);
+       }
+       return err;
+}
+
+static void __exit n2_exit(void)
+{
+       of_unregister_driver(&n2_mau_driver);
+       of_unregister_driver(&n2_crypto_driver);
+}
+
+module_init(n2_init);
+module_exit(n2_exit);
diff --git a/drivers/crypto/n2_core.h b/drivers/crypto/n2_core.h
new file mode 100644 (file)
index 0000000..4bcbbea
--- /dev/null
@@ -0,0 +1,231 @@
+#ifndef _N2_CORE_H
+#define _N2_CORE_H
+
+#ifndef __ASSEMBLY__
+
+struct ino_blob {
+       u64                     intr;
+       u64                     ino;
+};
+
+struct spu_mdesc_info {
+       u64                     cfg_handle;
+       struct ino_blob         *ino_table;
+       int                     num_intrs;
+};
+
+struct n2_crypto {
+       struct spu_mdesc_info   cwq_info;
+       struct list_head        cwq_list;
+};
+
+struct n2_mau {
+       struct spu_mdesc_info   mau_info;
+       struct list_head        mau_list;
+};
+
+#define CWQ_ENTRY_SIZE         64
+#define CWQ_NUM_ENTRIES                64
+
+#define MAU_ENTRY_SIZE         64
+#define MAU_NUM_ENTRIES                64
+
+struct cwq_initial_entry {
+       u64                     control;
+       u64                     src_addr;
+       u64                     auth_key_addr;
+       u64                     auth_iv_addr;
+       u64                     final_auth_state_addr;
+       u64                     enc_key_addr;
+       u64                     enc_iv_addr;
+       u64                     dest_addr;
+};
+
+struct cwq_ext_entry {
+       u64                     len;
+       u64                     src_addr;
+       u64                     resv1;
+       u64                     resv2;
+       u64                     resv3;
+       u64                     resv4;
+       u64                     resv5;
+       u64                     resv6;
+};
+
+struct cwq_final_entry {
+       u64                     control;
+       u64                     src_addr;
+       u64                     resv1;
+       u64                     resv2;
+       u64                     resv3;
+       u64                     resv4;
+       u64                     resv5;
+       u64                     resv6;
+};
+
+#define CONTROL_LEN                    0x000000000000ffffULL
+#define CONTROL_LEN_SHIFT              0
+#define CONTROL_HMAC_KEY_LEN           0x0000000000ff0000ULL
+#define CONTROL_HMAC_KEY_LEN_SHIFT     16
+#define CONTROL_ENC_TYPE               0x00000000ff000000ULL
+#define CONTROL_ENC_TYPE_SHIFT         24
+#define  ENC_TYPE_ALG_RC4_STREAM       0x00ULL
+#define  ENC_TYPE_ALG_RC4_NOSTREAM     0x04ULL
+#define  ENC_TYPE_ALG_DES              0x08ULL
+#define  ENC_TYPE_ALG_3DES             0x0cULL
+#define  ENC_TYPE_ALG_AES128           0x10ULL
+#define  ENC_TYPE_ALG_AES192           0x14ULL
+#define  ENC_TYPE_ALG_AES256           0x18ULL
+#define  ENC_TYPE_ALG_RESERVED         0x1cULL
+#define  ENC_TYPE_ALG_MASK             0x1cULL
+#define  ENC_TYPE_CHAINING_ECB         0x00ULL
+#define  ENC_TYPE_CHAINING_CBC         0x01ULL
+#define  ENC_TYPE_CHAINING_CFB         0x02ULL
+#define  ENC_TYPE_CHAINING_COUNTER     0x03ULL
+#define  ENC_TYPE_CHAINING_MASK                0x03ULL
+#define CONTROL_AUTH_TYPE              0x0000001f00000000ULL
+#define CONTROL_AUTH_TYPE_SHIFT                32
+#define  AUTH_TYPE_RESERVED            0x00ULL
+#define  AUTH_TYPE_MD5                 0x01ULL
+#define  AUTH_TYPE_SHA1                        0x02ULL
+#define  AUTH_TYPE_SHA256              0x03ULL
+#define  AUTH_TYPE_CRC32               0x04ULL
+#define  AUTH_TYPE_HMAC_MD5            0x05ULL
+#define  AUTH_TYPE_HMAC_SHA1           0x06ULL
+#define  AUTH_TYPE_HMAC_SHA256         0x07ULL
+#define  AUTH_TYPE_TCP_CHECKSUM                0x08ULL
+#define  AUTH_TYPE_SSL_HMAC_MD5                0x09ULL
+#define  AUTH_TYPE_SSL_HMAC_SHA1       0x0aULL
+#define  AUTH_TYPE_SSL_HMAC_SHA256     0x0bULL
+#define CONTROL_STRAND                 0x000000e000000000ULL
+#define CONTROL_STRAND_SHIFT           37
+#define CONTROL_HASH_LEN               0x0000ff0000000000ULL
+#define CONTROL_HASH_LEN_SHIFT         40
+#define CONTROL_INTERRUPT              0x0001000000000000ULL
+#define CONTROL_STORE_FINAL_AUTH_STATE 0x0002000000000000ULL
+#define CONTROL_RESERVED               0x001c000000000000ULL
+#define CONTROL_HV_DONE                        0x0004000000000000ULL
+#define CONTROL_HV_PROTOCOL_ERROR      0x0008000000000000ULL
+#define CONTROL_HV_HARDWARE_ERROR      0x0010000000000000ULL
+#define CONTROL_END_OF_BLOCK           0x0020000000000000ULL
+#define CONTROL_START_OF_BLOCK         0x0040000000000000ULL
+#define CONTROL_ENCRYPT                        0x0080000000000000ULL
+#define CONTROL_OPCODE                 0xff00000000000000ULL
+#define CONTROL_OPCODE_SHIFT           56
+#define  OPCODE_INPLACE_BIT            0x80ULL
+#define  OPCODE_SSL_KEYBLOCK           0x10ULL
+#define  OPCODE_COPY                   0x20ULL
+#define  OPCODE_ENCRYPT                        0x40ULL
+#define  OPCODE_AUTH_MAC               0x41ULL
+
+#endif /* !(__ASSEMBLY__) */
+
+/* NCS v2.0 hypervisor interfaces */
+#define HV_NCS_QTYPE_MAU               0x01
+#define HV_NCS_QTYPE_CWQ               0x02
+
+/* ncs_qconf()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_NCS_QCONF
+ * ARG0:       Queue type (HV_NCS_QTYPE_{MAU,CWQ})
+ * ARG1:       Real address of queue, or handle for unconfigure
+ * ARG2:       Number of entries in queue, zero for unconfigure
+ * RET0:       status
+ * RET1:       queue handle
+ *
+ * Configure a queue in the stream processing unit.
+ *
+ * The real address given as the base must be 64-byte
+ * aligned.
+ *
+ * The queue size can range from a minimum of 2 to a maximum
+ * of 64.  The queue size must be a power of two.
+ *
+ * To unconfigure a queue, specify a length of zero and place
+ * the queue handle into ARG1.
+ *
+ * On configure success the hypervisor will set the FIRST, HEAD,
+ * and TAIL registers to the address of the first entry in the
+ * queue.  The LAST register will be set to point to the last
+ * entry in the queue.
+ */
+#define HV_FAST_NCS_QCONF              0x111
+
+/* ncs_qinfo()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_NCS_QINFO
+ * ARG0:       Queue handle
+ * RET0:       status
+ * RET1:       Queue type (HV_NCS_QTYPE_{MAU,CWQ})
+ * RET2:       Queue base address
+ * RET3:       Number of entries
+ */
+#define HV_FAST_NCS_QINFO              0x112
+
+/* ncs_gethead()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_NCS_GETHEAD
+ * ARG0:       Queue handle
+ * RET0:       status
+ * RET1:       queue head offset
+ */
+#define HV_FAST_NCS_GETHEAD            0x113
+
+/* ncs_gettail()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_NCS_GETTAIL
+ * ARG0:       Queue handle
+ * RET0:       status
+ * RET1:       queue tail offset
+ */
+#define HV_FAST_NCS_GETTAIL            0x114
+
+/* ncs_settail()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_NCS_SETTAIL
+ * ARG0:       Queue handle
+ * ARG1:       New tail offset
+ * RET0:       status
+ */
+#define HV_FAST_NCS_SETTAIL            0x115
+
+/* ncs_qhandle_to_devino()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_NCS_QHANDLE_TO_DEVINO
+ * ARG0:       Queue handle
+ * RET0:       status
+ * RET1:       devino
+ */
+#define HV_FAST_NCS_QHANDLE_TO_DEVINO  0x116
+
+/* ncs_sethead_marker()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_NCS_SETHEAD_MARKER
+ * ARG0:       Queue handle
+ * ARG1:       New head offset
+ * RET0:       status
+ */
+#define HV_FAST_NCS_SETHEAD_MARKER     0x117
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_ncs_qconf(unsigned long queue_type,
+                                    unsigned long queue_ra,
+                                    unsigned long num_entries,
+                                    unsigned long *qhandle);
+extern unsigned long sun4v_ncs_qinfo(unsigned long qhandle,
+                                    unsigned long *queue_type,
+                                    unsigned long *queue_ra,
+                                    unsigned long *num_entries);
+extern unsigned long sun4v_ncs_gethead(unsigned long qhandle,
+                                      unsigned long *head);
+extern unsigned long sun4v_ncs_gettail(unsigned long qhandle,
+                                      unsigned long *tail);
+extern unsigned long sun4v_ncs_settail(unsigned long qhandle,
+                                      unsigned long tail);
+extern unsigned long sun4v_ncs_qhandle_to_devino(unsigned long qhandle,
+                                                unsigned long *devino);
+extern unsigned long sun4v_ncs_sethead_marker(unsigned long qhandle,
+                                             unsigned long head);
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* _N2_CORE_H */
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
new file mode 100644 (file)
index 0000000..8b03433
--- /dev/null
@@ -0,0 +1,1259 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for OMAP SHA1/MD5 HW acceleration.
+ *
+ * Copyright (c) 2010 Nokia Corporation
+ * Author: Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Some ideas are from old omap-sha1-md5.c driver.
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/version.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <crypto/sha.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+
+#include <plat/cpu.h>
+#include <plat/dma.h>
+#include <mach/irqs.h>
+
+#define SHA_REG_DIGEST(x)              (0x00 + ((x) * 0x04))
+#define SHA_REG_DIN(x)                 (0x1C + ((x) * 0x04))
+
+#define SHA1_MD5_BLOCK_SIZE            SHA1_BLOCK_SIZE
+#define MD5_DIGEST_SIZE                        16
+
+#define SHA_REG_DIGCNT                 0x14
+
+#define SHA_REG_CTRL                   0x18
+#define SHA_REG_CTRL_LENGTH            (0xFFFFFFFF << 5)
+#define SHA_REG_CTRL_CLOSE_HASH                (1 << 4)
+#define SHA_REG_CTRL_ALGO_CONST                (1 << 3)
+#define SHA_REG_CTRL_ALGO              (1 << 2)
+#define SHA_REG_CTRL_INPUT_READY       (1 << 1)
+#define SHA_REG_CTRL_OUTPUT_READY      (1 << 0)
+
+#define SHA_REG_REV                    0x5C
+#define SHA_REG_REV_MAJOR              0xF0
+#define SHA_REG_REV_MINOR              0x0F
+
+#define SHA_REG_MASK                   0x60
+#define SHA_REG_MASK_DMA_EN            (1 << 3)
+#define SHA_REG_MASK_IT_EN             (1 << 2)
+#define SHA_REG_MASK_SOFTRESET         (1 << 1)
+#define SHA_REG_AUTOIDLE               (1 << 0)
+
+#define SHA_REG_SYSSTATUS              0x64
+#define SHA_REG_SYSSTATUS_RESETDONE    (1 << 0)
+
+#define DEFAULT_TIMEOUT_INTERVAL       HZ
+
+#define FLAGS_FIRST            0x0001
+#define FLAGS_FINUP            0x0002
+#define FLAGS_FINAL            0x0004
+#define FLAGS_FAST             0x0008
+#define FLAGS_SHA1             0x0010
+#define FLAGS_DMA_ACTIVE       0x0020
+#define FLAGS_OUTPUT_READY     0x0040
+#define FLAGS_CLEAN            0x0080
+#define FLAGS_INIT             0x0100
+#define FLAGS_CPU              0x0200
+#define FLAGS_HMAC             0x0400
+
+/* 3rd byte */
+#define FLAGS_BUSY             16
+
+#define OP_UPDATE      1
+#define OP_FINAL       2
+
+struct omap_sham_dev;
+
+struct omap_sham_reqctx {
+       struct omap_sham_dev    *dd;
+       unsigned long           flags;
+       unsigned long           op;
+
+       size_t                  digcnt;
+       u8                      *buffer;
+       size_t                  bufcnt;
+       size_t                  buflen;
+       dma_addr_t              dma_addr;
+
+       /* walk state */
+       struct scatterlist      *sg;
+       unsigned int            offset; /* offset in current sg */
+       unsigned int            total;  /* total request */
+};
+
+struct omap_sham_hmac_ctx {
+       struct crypto_shash     *shash;
+       u8                      ipad[SHA1_MD5_BLOCK_SIZE];
+       u8                      opad[SHA1_MD5_BLOCK_SIZE];
+};
+
+struct omap_sham_ctx {
+       struct omap_sham_dev    *dd;
+
+       unsigned long           flags;
+
+       /* fallback stuff */
+       struct crypto_shash     *fallback;
+
+       struct omap_sham_hmac_ctx base[0];
+};
+
+#define OMAP_SHAM_QUEUE_LENGTH 1
+
+struct omap_sham_dev {
+       struct list_head        list;
+       unsigned long           phys_base;
+       struct device           *dev;
+       void __iomem            *io_base;
+       int                     irq;
+       struct clk              *iclk;
+       spinlock_t              lock;
+       int                     dma;
+       int                     dma_lch;
+       struct tasklet_struct   done_task;
+       struct tasklet_struct   queue_task;
+
+       unsigned long           flags;
+       struct crypto_queue     queue;
+       struct ahash_request    *req;
+};
+
+struct omap_sham_drv {
+       struct list_head        dev_list;
+       spinlock_t              lock;
+       unsigned long           flags;
+};
+
+static struct omap_sham_drv sham = {
+       .dev_list = LIST_HEAD_INIT(sham.dev_list),
+       .lock = __SPIN_LOCK_UNLOCKED(sham.lock),
+};
+
+static inline u32 omap_sham_read(struct omap_sham_dev *dd, u32 offset)
+{
+       return __raw_readl(dd->io_base + offset);
+}
+
+static inline void omap_sham_write(struct omap_sham_dev *dd,
+                                       u32 offset, u32 value)
+{
+       __raw_writel(value, dd->io_base + offset);
+}
+
+static inline void omap_sham_write_mask(struct omap_sham_dev *dd, u32 address,
+                                       u32 value, u32 mask)
+{
+       u32 val;
+
+       val = omap_sham_read(dd, address);
+       val &= ~mask;
+       val |= value;
+       omap_sham_write(dd, address, val);
+}
+
+static inline int omap_sham_wait(struct omap_sham_dev *dd, u32 offset, u32 bit)
+{
+       unsigned long timeout = jiffies + DEFAULT_TIMEOUT_INTERVAL;
+
+       while (!(omap_sham_read(dd, offset) & bit)) {
+               if (time_is_before_jiffies(timeout))
+                       return -ETIMEDOUT;
+       }
+
+       return 0;
+}
+
+static void omap_sham_copy_hash(struct ahash_request *req, int out)
+{
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+       u32 *hash = (u32 *)req->result;
+       int i;
+
+       if (likely(ctx->flags & FLAGS_SHA1)) {
+               /* SHA1 results are in big endian */
+               for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++)
+                       if (out)
+                               hash[i] = be32_to_cpu(omap_sham_read(ctx->dd,
+                                                       SHA_REG_DIGEST(i)));
+                       else
+                               omap_sham_write(ctx->dd, SHA_REG_DIGEST(i),
+                                                       cpu_to_be32(hash[i]));
+       } else {
+               /* MD5 results are in little endian */
+               for (i = 0; i < MD5_DIGEST_SIZE / sizeof(u32); i++)
+                       if (out)
+                               hash[i] = le32_to_cpu(omap_sham_read(ctx->dd,
+                                                       SHA_REG_DIGEST(i)));
+                       else
+                               omap_sham_write(ctx->dd, SHA_REG_DIGEST(i),
+                                                       cpu_to_le32(hash[i]));
+       }
+}
+
+static int omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length,
+                                int final, int dma)
+{
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+       u32 val = length << 5, mask;
+
+       if (unlikely(!ctx->digcnt)) {
+
+               clk_enable(dd->iclk);
+
+               if (!(dd->flags & FLAGS_INIT)) {
+                       omap_sham_write_mask(dd, SHA_REG_MASK,
+                               SHA_REG_MASK_SOFTRESET, SHA_REG_MASK_SOFTRESET);
+
+                       if (omap_sham_wait(dd, SHA_REG_SYSSTATUS,
+                                               SHA_REG_SYSSTATUS_RESETDONE))
+                               return -ETIMEDOUT;
+
+                       dd->flags |= FLAGS_INIT;
+               }
+       } else {
+               omap_sham_write(dd, SHA_REG_DIGCNT, ctx->digcnt);
+       }
+
+       omap_sham_write_mask(dd, SHA_REG_MASK,
+               SHA_REG_MASK_IT_EN | (dma ? SHA_REG_MASK_DMA_EN : 0),
+               SHA_REG_MASK_IT_EN | SHA_REG_MASK_DMA_EN);
+       /*
+        * Setting ALGO_CONST only for the first iteration
+        * and CLOSE_HASH only for the last one.
+        */
+       if (ctx->flags & FLAGS_SHA1)
+               val |= SHA_REG_CTRL_ALGO;
+       if (!ctx->digcnt)
+               val |= SHA_REG_CTRL_ALGO_CONST;
+       if (final)
+               val |= SHA_REG_CTRL_CLOSE_HASH;
+
+       mask = SHA_REG_CTRL_ALGO_CONST | SHA_REG_CTRL_CLOSE_HASH |
+                       SHA_REG_CTRL_ALGO | SHA_REG_CTRL_LENGTH;
+
+       omap_sham_write_mask(dd, SHA_REG_CTRL, val, mask);
+
+       return 0;
+}
+
+static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf,
+                             size_t length, int final)
+{
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+       int err, count, len32;
+       const u32 *buffer = (const u32 *)buf;
+
+       dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n",
+                                               ctx->digcnt, length, final);
+
+       err = omap_sham_write_ctrl(dd, length, final, 0);
+       if (err)
+               return err;
+
+       if (omap_sham_wait(dd, SHA_REG_CTRL, SHA_REG_CTRL_INPUT_READY))
+               return -ETIMEDOUT;
+
+       ctx->digcnt += length;
+
+       if (final)
+               ctx->flags |= FLAGS_FINAL; /* catch last interrupt */
+
+       len32 = DIV_ROUND_UP(length, sizeof(u32));
+
+       for (count = 0; count < len32; count++)
+               omap_sham_write(dd, SHA_REG_DIN(count), buffer[count]);
+
+       return -EINPROGRESS;
+}
+
+static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
+                             size_t length, int final)
+{
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+       int err, len32;
+
+       dev_dbg(dd->dev, "xmit_dma: digcnt: %d, length: %d, final: %d\n",
+                                               ctx->digcnt, length, final);
+
+       /* flush cache entries related to our page */
+       if (dma_addr == ctx->dma_addr)
+               dma_sync_single_for_device(dd->dev, dma_addr, length,
+                                          DMA_TO_DEVICE);
+
+       len32 = DIV_ROUND_UP(length, sizeof(u32));
+
+       omap_set_dma_transfer_params(dd->dma_lch, OMAP_DMA_DATA_TYPE_S32, len32,
+                       1, OMAP_DMA_SYNC_PACKET, dd->dma, OMAP_DMA_DST_SYNC);
+
+       omap_set_dma_src_params(dd->dma_lch, 0, OMAP_DMA_AMODE_POST_INC,
+                               dma_addr, 0, 0);
+
+       err = omap_sham_write_ctrl(dd, length, final, 1);
+       if (err)
+               return err;
+
+       ctx->digcnt += length;
+
+       if (final)
+               ctx->flags |= FLAGS_FINAL; /* catch last interrupt */
+
+       dd->flags |= FLAGS_DMA_ACTIVE;
+
+       omap_start_dma(dd->dma_lch);
+
+       return -EINPROGRESS;
+}
+
+static size_t omap_sham_append_buffer(struct omap_sham_reqctx *ctx,
+                               const u8 *data, size_t length)
+{
+       size_t count = min(length, ctx->buflen - ctx->bufcnt);
+
+       count = min(count, ctx->total);
+       if (count <= 0)
+               return 0;
+       memcpy(ctx->buffer + ctx->bufcnt, data, count);
+       ctx->bufcnt += count;
+
+       return count;
+}
+
+static size_t omap_sham_append_sg(struct omap_sham_reqctx *ctx)
+{
+       size_t count;
+
+       while (ctx->sg) {
+               count = omap_sham_append_buffer(ctx,
+                               sg_virt(ctx->sg) + ctx->offset,
+                               ctx->sg->length - ctx->offset);
+               if (!count)
+                       break;
+               ctx->offset += count;
+               ctx->total -= count;
+               if (ctx->offset == ctx->sg->length) {
+                       ctx->sg = sg_next(ctx->sg);
+                       if (ctx->sg)
+                               ctx->offset = 0;
+                       else
+                               ctx->total = 0;
+               }
+       }
+
+       return 0;
+}
+
+static int omap_sham_update_dma_slow(struct omap_sham_dev *dd)
+{
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+       unsigned int final;
+       size_t count;
+
+       if (!ctx->total)
+               return 0;
+
+       omap_sham_append_sg(ctx);
+
+       final = (ctx->flags & FLAGS_FINUP) && !ctx->total;
+
+       dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: %d, final: %d\n",
+                                        ctx->bufcnt, ctx->digcnt, final);
+
+       if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
+               count = ctx->bufcnt;
+               ctx->bufcnt = 0;
+               return omap_sham_xmit_dma(dd, ctx->dma_addr, count, final);
+       }
+
+       return 0;
+}
+
+static int omap_sham_update_dma_fast(struct omap_sham_dev *dd)
+{
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+       unsigned int length;
+
+       ctx->flags |= FLAGS_FAST;
+
+       length = min(ctx->total, sg_dma_len(ctx->sg));
+       ctx->total = length;
+
+       if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) {
+               dev_err(dd->dev, "dma_map_sg  error\n");
+               return -EINVAL;
+       }
+
+       ctx->total -= length;
+
+       return omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, 1);
+}
+
+static int omap_sham_update_cpu(struct omap_sham_dev *dd)
+{
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+       int bufcnt;
+
+       omap_sham_append_sg(ctx);
+       bufcnt = ctx->bufcnt;
+       ctx->bufcnt = 0;
+
+       return omap_sham_xmit_cpu(dd, ctx->buffer, bufcnt, 1);
+}
+
+static int omap_sham_update_dma_stop(struct omap_sham_dev *dd)
+{
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+
+       omap_stop_dma(dd->dma_lch);
+       if (ctx->flags & FLAGS_FAST)
+               dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
+
+       return 0;
+}
+
+static void omap_sham_cleanup(struct ahash_request *req)
+{
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+       struct omap_sham_dev *dd = ctx->dd;
+       unsigned long flags;
+
+       spin_lock_irqsave(&dd->lock, flags);
+       if (ctx->flags & FLAGS_CLEAN) {
+               spin_unlock_irqrestore(&dd->lock, flags);
+               return;
+       }
+       ctx->flags |= FLAGS_CLEAN;
+       spin_unlock_irqrestore(&dd->lock, flags);
+
+       if (ctx->digcnt)
+               clk_disable(dd->iclk);
+
+       if (ctx->dma_addr)
+               dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen,
+                                DMA_TO_DEVICE);
+
+       if (ctx->buffer)
+               free_page((unsigned long)ctx->buffer);
+
+       dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt, ctx->bufcnt);
+}
+
+static int omap_sham_init(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct omap_sham_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+       struct omap_sham_dev *dd = NULL, *tmp;
+
+       spin_lock_bh(&sham.lock);
+       if (!tctx->dd) {
+               list_for_each_entry(tmp, &sham.dev_list, list) {
+                       dd = tmp;
+                       break;
+               }
+               tctx->dd = dd;
+       } else {
+               dd = tctx->dd;
+       }
+       spin_unlock_bh(&sham.lock);
+
+       ctx->dd = dd;
+
+       ctx->flags = 0;
+
+       ctx->flags |= FLAGS_FIRST;
+
+       dev_dbg(dd->dev, "init: digest size: %d\n",
+               crypto_ahash_digestsize(tfm));
+
+       if (crypto_ahash_digestsize(tfm) == SHA1_DIGEST_SIZE)
+               ctx->flags |= FLAGS_SHA1;
+
+       ctx->bufcnt = 0;
+       ctx->digcnt = 0;
+
+       ctx->buflen = PAGE_SIZE;
+       ctx->buffer = (void *)__get_free_page(
+                               (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+                               GFP_KERNEL : GFP_ATOMIC);
+       if (!ctx->buffer)
+               return -ENOMEM;
+
+       ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen,
+                                       DMA_TO_DEVICE);
+       if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
+               dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen);
+               free_page((unsigned long)ctx->buffer);
+               return -EINVAL;
+       }
+
+       if (tctx->flags & FLAGS_HMAC) {
+               struct omap_sham_hmac_ctx *bctx = tctx->base;
+
+               memcpy(ctx->buffer, bctx->ipad, SHA1_MD5_BLOCK_SIZE);
+               ctx->bufcnt = SHA1_MD5_BLOCK_SIZE;
+               ctx->flags |= FLAGS_HMAC;
+       }
+
+       return 0;
+
+}
+
+static int omap_sham_update_req(struct omap_sham_dev *dd)
+{
+       struct ahash_request *req = dd->req;
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+       int err;
+
+       dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, finup: %d\n",
+                ctx->total, ctx->digcnt, (ctx->flags & FLAGS_FINUP) != 0);
+
+       if (ctx->flags & FLAGS_CPU)
+               err = omap_sham_update_cpu(dd);
+       else if (ctx->flags & FLAGS_FAST)
+               err = omap_sham_update_dma_fast(dd);
+       else
+               err = omap_sham_update_dma_slow(dd);
+
+       /* wait for dma completion before can take more data */
+       dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n", err, ctx->digcnt);
+
+       return err;
+}
+
+static int omap_sham_final_req(struct omap_sham_dev *dd)
+{
+       struct ahash_request *req = dd->req;
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+       int err = 0, use_dma = 1;
+
+       if (ctx->bufcnt <= 64)
+               /* faster to handle last block with cpu */
+               use_dma = 0;
+
+       if (use_dma)
+               err = omap_sham_xmit_dma(dd, ctx->dma_addr, ctx->bufcnt, 1);
+       else
+               err = omap_sham_xmit_cpu(dd, ctx->buffer, ctx->bufcnt, 1);
+
+       ctx->bufcnt = 0;
+
+       if (err != -EINPROGRESS)
+               omap_sham_cleanup(req);
+
+       dev_dbg(dd->dev, "final_req: err: %d\n", err);
+
+       return err;
+}
+
+static int omap_sham_finish_req_hmac(struct ahash_request *req)
+{
+       struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+       struct omap_sham_hmac_ctx *bctx = tctx->base;
+       int bs = crypto_shash_blocksize(bctx->shash);
+       int ds = crypto_shash_digestsize(bctx->shash);
+       struct {
+               struct shash_desc shash;
+               char ctx[crypto_shash_descsize(bctx->shash)];
+       } desc;
+
+       desc.shash.tfm = bctx->shash;
+       desc.shash.flags = 0; /* not CRYPTO_TFM_REQ_MAY_SLEEP */
+
+       return crypto_shash_init(&desc.shash) ?:
+              crypto_shash_update(&desc.shash, bctx->opad, bs) ?:
+              crypto_shash_finup(&desc.shash, req->result, ds, req->result);
+}
+
+static void omap_sham_finish_req(struct ahash_request *req, int err)
+{
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+
+       if (!err) {
+               omap_sham_copy_hash(ctx->dd->req, 1);
+               if (ctx->flags & FLAGS_HMAC)
+                       err = omap_sham_finish_req_hmac(req);
+       }
+
+       if (ctx->flags & FLAGS_FINAL)
+               omap_sham_cleanup(req);
+
+       clear_bit(FLAGS_BUSY, &ctx->dd->flags);
+
+       if (req->base.complete)
+               req->base.complete(&req->base, err);
+}
+
+static int omap_sham_handle_queue(struct omap_sham_dev *dd)
+{
+       struct crypto_async_request *async_req, *backlog;
+       struct omap_sham_reqctx *ctx;
+       struct ahash_request *req, *prev_req;
+       unsigned long flags;
+       int err = 0;
+
+       if (test_and_set_bit(FLAGS_BUSY, &dd->flags))
+               return 0;
+
+       spin_lock_irqsave(&dd->lock, flags);
+       backlog = crypto_get_backlog(&dd->queue);
+       async_req = crypto_dequeue_request(&dd->queue);
+       if (!async_req)
+               clear_bit(FLAGS_BUSY, &dd->flags);
+       spin_unlock_irqrestore(&dd->lock, flags);
+
+       if (!async_req)
+               return 0;
+
+       if (backlog)
+               backlog->complete(backlog, -EINPROGRESS);
+
+       req = ahash_request_cast(async_req);
+
+       prev_req = dd->req;
+       dd->req = req;
+
+       ctx = ahash_request_ctx(req);
+
+       dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n",
+                                               ctx->op, req->nbytes);
+
+       if (req != prev_req && ctx->digcnt)
+               /* request has changed - restore hash */
+               omap_sham_copy_hash(req, 0);
+
+       if (ctx->op == OP_UPDATE) {
+               err = omap_sham_update_req(dd);
+               if (err != -EINPROGRESS && (ctx->flags & FLAGS_FINUP))
+                       /* no final() after finup() */
+                       err = omap_sham_final_req(dd);
+       } else if (ctx->op == OP_FINAL) {
+               err = omap_sham_final_req(dd);
+       }
+
+       if (err != -EINPROGRESS) {
+               /* done_task will not finish it, so do it here */
+               omap_sham_finish_req(req, err);
+               tasklet_schedule(&dd->queue_task);
+       }
+
+       dev_dbg(dd->dev, "exit, err: %d\n", err);
+
+       return err;
+}
+
+static int omap_sham_enqueue(struct ahash_request *req, unsigned int op)
+{
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+       struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+       struct omap_sham_dev *dd = tctx->dd;
+       unsigned long flags;
+       int err;
+
+       ctx->op = op;
+
+       spin_lock_irqsave(&dd->lock, flags);
+       err = ahash_enqueue_request(&dd->queue, req);
+       spin_unlock_irqrestore(&dd->lock, flags);
+
+       omap_sham_handle_queue(dd);
+
+       return err;
+}
+
+static int omap_sham_update(struct ahash_request *req)
+{
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+
+       if (!req->nbytes)
+               return 0;
+
+       ctx->total = req->nbytes;
+       ctx->sg = req->src;
+       ctx->offset = 0;
+
+       if (ctx->flags & FLAGS_FINUP) {
+               if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 9) {
+                       /*
+                       * OMAP HW accel works only with buffers >= 9
+                       * will switch to bypass in final()
+                       * final has the same request and data
+                       */
+                       omap_sham_append_sg(ctx);
+                       return 0;
+               } else if (ctx->bufcnt + ctx->total <= 64) {
+                       ctx->flags |= FLAGS_CPU;
+               } else if (!ctx->bufcnt && sg_is_last(ctx->sg)) {
+                       /* may be can use faster functions */
+                       int aligned = IS_ALIGNED((u32)ctx->sg->offset,
+                                                               sizeof(u32));
+
+                       if (aligned && (ctx->flags & FLAGS_FIRST))
+                               /* digest: first and final */
+                               ctx->flags |= FLAGS_FAST;
+
+                       ctx->flags &= ~FLAGS_FIRST;
+               }
+       } else if (ctx->bufcnt + ctx->total <= ctx->buflen) {
+               /* if not finaup -> not fast */
+               omap_sham_append_sg(ctx);
+               return 0;
+       }
+
+       return omap_sham_enqueue(req, OP_UPDATE);
+}
+
+static int omap_sham_shash_digest(struct crypto_shash *shash, u32 flags,
+                                 const u8 *data, unsigned int len, u8 *out)
+{
+       struct {
+               struct shash_desc shash;
+               char ctx[crypto_shash_descsize(shash)];
+       } desc;
+
+       desc.shash.tfm = shash;
+       desc.shash.flags = flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       return crypto_shash_digest(&desc.shash, data, len, out);
+}
+
+static int omap_sham_final_shash(struct ahash_request *req)
+{
+       struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+
+       return omap_sham_shash_digest(tctx->fallback, req->base.flags,
+                                     ctx->buffer, ctx->bufcnt, req->result);
+}
+
+static int omap_sham_final(struct ahash_request *req)
+{
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+       int err = 0;
+
+       ctx->flags |= FLAGS_FINUP;
+
+       /* OMAP HW accel works only with buffers >= 9 */
+       /* HMAC is always >= 9 because of ipad */
+       if ((ctx->digcnt + ctx->bufcnt) < 9)
+               err = omap_sham_final_shash(req);
+       else if (ctx->bufcnt)
+               return omap_sham_enqueue(req, OP_FINAL);
+
+       omap_sham_cleanup(req);
+
+       return err;
+}
+
+static int omap_sham_finup(struct ahash_request *req)
+{
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+       int err1, err2;
+
+       ctx->flags |= FLAGS_FINUP;
+
+       err1 = omap_sham_update(req);
+       if (err1 == -EINPROGRESS)
+               return err1;
+       /*
+        * final() has to be always called to cleanup resources
+        * even if udpate() failed, except EINPROGRESS
+        */
+       err2 = omap_sham_final(req);
+
+       return err1 ?: err2;
+}
+
+static int omap_sham_digest(struct ahash_request *req)
+{
+       return omap_sham_init(req) ?: omap_sham_finup(req);
+}
+
+static int omap_sham_setkey(struct crypto_ahash *tfm, const u8 *key,
+                     unsigned int keylen)
+{
+       struct omap_sham_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct omap_sham_hmac_ctx *bctx = tctx->base;
+       int bs = crypto_shash_blocksize(bctx->shash);
+       int ds = crypto_shash_digestsize(bctx->shash);
+       int err, i;
+       err = crypto_shash_setkey(tctx->fallback, key, keylen);
+       if (err)
+               return err;
+
+       if (keylen > bs) {
+               err = omap_sham_shash_digest(bctx->shash,
+                               crypto_shash_get_flags(bctx->shash),
+                               key, keylen, bctx->ipad);
+               if (err)
+                       return err;
+               keylen = ds;
+       } else {
+               memcpy(bctx->ipad, key, keylen);
+       }
+
+       memset(bctx->ipad + keylen, 0, bs - keylen);
+       memcpy(bctx->opad, bctx->ipad, bs);
+
+       for (i = 0; i < bs; i++) {
+               bctx->ipad[i] ^= 0x36;
+               bctx->opad[i] ^= 0x5c;
+       }
+
+       return err;
+}
+
+static int omap_sham_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
+{
+       struct omap_sham_ctx *tctx = crypto_tfm_ctx(tfm);
+       const char *alg_name = crypto_tfm_alg_name(tfm);
+
+       /* Allocate a fallback and abort if it failed. */
+       tctx->fallback = crypto_alloc_shash(alg_name, 0,
+                                           CRYPTO_ALG_NEED_FALLBACK);
+       if (IS_ERR(tctx->fallback)) {
+               pr_err("omap-sham: fallback driver '%s' "
+                               "could not be loaded.\n", alg_name);
+               return PTR_ERR(tctx->fallback);
+       }
+
+       crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+                                sizeof(struct omap_sham_reqctx));
+
+       if (alg_base) {
+               struct omap_sham_hmac_ctx *bctx = tctx->base;
+               tctx->flags |= FLAGS_HMAC;
+               bctx->shash = crypto_alloc_shash(alg_base, 0,
+                                               CRYPTO_ALG_NEED_FALLBACK);
+               if (IS_ERR(bctx->shash)) {
+                       pr_err("omap-sham: base driver '%s' "
+                                       "could not be loaded.\n", alg_base);
+                       crypto_free_shash(tctx->fallback);
+                       return PTR_ERR(bctx->shash);
+               }
+
+       }
+
+       return 0;
+}
+
+static int omap_sham_cra_init(struct crypto_tfm *tfm)
+{
+       return omap_sham_cra_init_alg(tfm, NULL);
+}
+
+static int omap_sham_cra_sha1_init(struct crypto_tfm *tfm)
+{
+       return omap_sham_cra_init_alg(tfm, "sha1");
+}
+
+static int omap_sham_cra_md5_init(struct crypto_tfm *tfm)
+{
+       return omap_sham_cra_init_alg(tfm, "md5");
+}
+
+static void omap_sham_cra_exit(struct crypto_tfm *tfm)
+{
+       struct omap_sham_ctx *tctx = crypto_tfm_ctx(tfm);
+
+       crypto_free_shash(tctx->fallback);
+       tctx->fallback = NULL;
+
+       if (tctx->flags & FLAGS_HMAC) {
+               struct omap_sham_hmac_ctx *bctx = tctx->base;
+               crypto_free_shash(bctx->shash);
+       }
+}
+
+static struct ahash_alg algs[] = {
+{
+       .init           = omap_sham_init,
+       .update         = omap_sham_update,
+       .final          = omap_sham_final,
+       .finup          = omap_sham_finup,
+       .digest         = omap_sham_digest,
+       .halg.digestsize        = SHA1_DIGEST_SIZE,
+       .halg.base      = {
+               .cra_name               = "sha1",
+               .cra_driver_name        = "omap-sha1",
+               .cra_priority           = 100,
+               .cra_flags              = CRYPTO_ALG_TYPE_AHASH |
+                                               CRYPTO_ALG_ASYNC |
+                                               CRYPTO_ALG_NEED_FALLBACK,
+               .cra_blocksize          = SHA1_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct omap_sham_ctx),
+               .cra_alignmask          = 0,
+               .cra_module             = THIS_MODULE,
+               .cra_init               = omap_sham_cra_init,
+               .cra_exit               = omap_sham_cra_exit,
+       }
+},
+{
+       .init           = omap_sham_init,
+       .update         = omap_sham_update,
+       .final          = omap_sham_final,
+       .finup          = omap_sham_finup,
+       .digest         = omap_sham_digest,
+       .halg.digestsize        = MD5_DIGEST_SIZE,
+       .halg.base      = {
+               .cra_name               = "md5",
+               .cra_driver_name        = "omap-md5",
+               .cra_priority           = 100,
+               .cra_flags              = CRYPTO_ALG_TYPE_AHASH |
+                                               CRYPTO_ALG_ASYNC |
+                                               CRYPTO_ALG_NEED_FALLBACK,
+               .cra_blocksize          = SHA1_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct omap_sham_ctx),
+               .cra_alignmask          = 0,
+               .cra_module             = THIS_MODULE,
+               .cra_init               = omap_sham_cra_init,
+               .cra_exit               = omap_sham_cra_exit,
+       }
+},
+{
+       .init           = omap_sham_init,
+       .update         = omap_sham_update,
+       .final          = omap_sham_final,
+       .finup          = omap_sham_finup,
+       .digest         = omap_sham_digest,
+       .setkey         = omap_sham_setkey,
+       .halg.digestsize        = SHA1_DIGEST_SIZE,
+       .halg.base      = {
+               .cra_name               = "hmac(sha1)",
+               .cra_driver_name        = "omap-hmac-sha1",
+               .cra_priority           = 100,
+               .cra_flags              = CRYPTO_ALG_TYPE_AHASH |
+                                               CRYPTO_ALG_ASYNC |
+                                               CRYPTO_ALG_NEED_FALLBACK,
+               .cra_blocksize          = SHA1_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct omap_sham_ctx) +
+                                       sizeof(struct omap_sham_hmac_ctx),
+               .cra_alignmask          = 0,
+               .cra_module             = THIS_MODULE,
+               .cra_init               = omap_sham_cra_sha1_init,
+               .cra_exit               = omap_sham_cra_exit,
+       }
+},
+{
+       .init           = omap_sham_init,
+       .update         = omap_sham_update,
+       .final          = omap_sham_final,
+       .finup          = omap_sham_finup,
+       .digest         = omap_sham_digest,
+       .setkey         = omap_sham_setkey,
+       .halg.digestsize        = MD5_DIGEST_SIZE,
+       .halg.base      = {
+               .cra_name               = "hmac(md5)",
+               .cra_driver_name        = "omap-hmac-md5",
+               .cra_priority           = 100,
+               .cra_flags              = CRYPTO_ALG_TYPE_AHASH |
+                                               CRYPTO_ALG_ASYNC |
+                                               CRYPTO_ALG_NEED_FALLBACK,
+               .cra_blocksize          = SHA1_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct omap_sham_ctx) +
+                                       sizeof(struct omap_sham_hmac_ctx),
+               .cra_alignmask          = 0,
+               .cra_module             = THIS_MODULE,
+               .cra_init               = omap_sham_cra_md5_init,
+               .cra_exit               = omap_sham_cra_exit,
+       }
+}
+};
+
+static void omap_sham_done_task(unsigned long data)
+{
+       struct omap_sham_dev *dd = (struct omap_sham_dev *)data;
+       struct ahash_request *req = dd->req;
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+       int ready = 1;
+
+       if (ctx->flags & FLAGS_OUTPUT_READY) {
+               ctx->flags &= ~FLAGS_OUTPUT_READY;
+               ready = 1;
+       }
+
+       if (dd->flags & FLAGS_DMA_ACTIVE) {
+               dd->flags &= ~FLAGS_DMA_ACTIVE;
+               omap_sham_update_dma_stop(dd);
+               omap_sham_update_dma_slow(dd);
+       }
+
+       if (ready && !(dd->flags & FLAGS_DMA_ACTIVE)) {
+               dev_dbg(dd->dev, "update done\n");
+               /* finish curent request */
+               omap_sham_finish_req(req, 0);
+               /* start new request */
+               omap_sham_handle_queue(dd);
+       }
+}
+
+static void omap_sham_queue_task(unsigned long data)
+{
+       struct omap_sham_dev *dd = (struct omap_sham_dev *)data;
+
+       omap_sham_handle_queue(dd);
+}
+
+static irqreturn_t omap_sham_irq(int irq, void *dev_id)
+{
+       struct omap_sham_dev *dd = dev_id;
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+
+       if (!ctx) {
+               dev_err(dd->dev, "unknown interrupt.\n");
+               return IRQ_HANDLED;
+       }
+
+       if (unlikely(ctx->flags & FLAGS_FINAL))
+               /* final -> allow device to go to power-saving mode */
+               omap_sham_write_mask(dd, SHA_REG_CTRL, 0, SHA_REG_CTRL_LENGTH);
+
+       omap_sham_write_mask(dd, SHA_REG_CTRL, SHA_REG_CTRL_OUTPUT_READY,
+                                SHA_REG_CTRL_OUTPUT_READY);
+       omap_sham_read(dd, SHA_REG_CTRL);
+
+       ctx->flags |= FLAGS_OUTPUT_READY;
+       tasklet_schedule(&dd->done_task);
+
+       return IRQ_HANDLED;
+}
+
+static void omap_sham_dma_callback(int lch, u16 ch_status, void *data)
+{
+       struct omap_sham_dev *dd = data;
+
+       if (likely(lch == dd->dma_lch))
+               tasklet_schedule(&dd->done_task);
+}
+
+static int omap_sham_dma_init(struct omap_sham_dev *dd)
+{
+       int err;
+
+       dd->dma_lch = -1;
+
+       err = omap_request_dma(dd->dma, dev_name(dd->dev),
+                       omap_sham_dma_callback, dd, &dd->dma_lch);
+       if (err) {
+               dev_err(dd->dev, "Unable to request DMA channel\n");
+               return err;
+       }
+       omap_set_dma_dest_params(dd->dma_lch, 0,
+                       OMAP_DMA_AMODE_CONSTANT,
+                       dd->phys_base + SHA_REG_DIN(0), 0, 16);
+
+       omap_set_dma_dest_burst_mode(dd->dma_lch,
+                       OMAP_DMA_DATA_BURST_16);
+
+       return 0;
+}
+
+static void omap_sham_dma_cleanup(struct omap_sham_dev *dd)
+{
+       if (dd->dma_lch >= 0) {
+               omap_free_dma(dd->dma_lch);
+               dd->dma_lch = -1;
+       }
+}
+
+static int __devinit omap_sham_probe(struct platform_device *pdev)
+{
+       struct omap_sham_dev *dd;
+       struct device *dev = &pdev->dev;
+       struct resource *res;
+       int err, i, j;
+
+       dd = kzalloc(sizeof(struct omap_sham_dev), GFP_KERNEL);
+       if (dd == NULL) {
+               dev_err(dev, "unable to alloc data struct.\n");
+               err = -ENOMEM;
+               goto data_err;
+       }
+       dd->dev = dev;
+       platform_set_drvdata(pdev, dd);
+
+       INIT_LIST_HEAD(&dd->list);
+       spin_lock_init(&dd->lock);
+       tasklet_init(&dd->done_task, omap_sham_done_task, (unsigned long)dd);
+       tasklet_init(&dd->queue_task, omap_sham_queue_task, (unsigned long)dd);
+       crypto_init_queue(&dd->queue, OMAP_SHAM_QUEUE_LENGTH);
+
+       dd->irq = -1;
+
+       /* Get the base address */
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res) {
+               dev_err(dev, "no MEM resource info\n");
+               err = -ENODEV;
+               goto res_err;
+       }
+       dd->phys_base = res->start;
+
+       /* Get the DMA */
+       res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+       if (!res) {
+               dev_err(dev, "no DMA resource info\n");
+               err = -ENODEV;
+               goto res_err;
+       }
+       dd->dma = res->start;
+
+       /* Get the IRQ */
+       dd->irq = platform_get_irq(pdev,  0);
+       if (dd->irq < 0) {
+               dev_err(dev, "no IRQ resource info\n");
+               err = dd->irq;
+               goto res_err;
+       }
+
+       err = request_irq(dd->irq, omap_sham_irq,
+                       IRQF_TRIGGER_LOW, dev_name(dev), dd);
+       if (err) {
+               dev_err(dev, "unable to request irq.\n");
+               goto res_err;
+       }
+
+       err = omap_sham_dma_init(dd);
+       if (err)
+               goto dma_err;
+
+       /* Initializing the clock */
+       dd->iclk = clk_get(dev, "ick");
+       if (!dd->iclk) {
+               dev_err(dev, "clock intialization failed.\n");
+               err = -ENODEV;
+               goto clk_err;
+       }
+
+       dd->io_base = ioremap(dd->phys_base, SZ_4K);
+       if (!dd->io_base) {
+               dev_err(dev, "can't ioremap\n");
+               err = -ENOMEM;
+               goto io_err;
+       }
+
+       clk_enable(dd->iclk);
+       dev_info(dev, "hw accel on OMAP rev %u.%u\n",
+               (omap_sham_read(dd, SHA_REG_REV) & SHA_REG_REV_MAJOR) >> 4,
+               omap_sham_read(dd, SHA_REG_REV) & SHA_REG_REV_MINOR);
+       clk_disable(dd->iclk);
+
+       spin_lock(&sham.lock);
+       list_add_tail(&dd->list, &sham.dev_list);
+       spin_unlock(&sham.lock);
+
+       for (i = 0; i < ARRAY_SIZE(algs); i++) {
+               err = crypto_register_ahash(&algs[i]);
+               if (err)
+                       goto err_algs;
+       }
+
+       return 0;
+
+err_algs:
+       for (j = 0; j < i; j++)
+               crypto_unregister_ahash(&algs[j]);
+       iounmap(dd->io_base);
+io_err:
+       clk_put(dd->iclk);
+clk_err:
+       omap_sham_dma_cleanup(dd);
+dma_err:
+       if (dd->irq >= 0)
+               free_irq(dd->irq, dd);
+res_err:
+       kfree(dd);
+       dd = NULL;
+data_err:
+       dev_err(dev, "initialization failed.\n");
+
+       return err;
+}
+
+static int __devexit omap_sham_remove(struct platform_device *pdev)
+{
+       static struct omap_sham_dev *dd;
+       int i;
+
+       dd = platform_get_drvdata(pdev);
+       if (!dd)
+               return -ENODEV;
+       spin_lock(&sham.lock);
+       list_del(&dd->list);
+       spin_unlock(&sham.lock);
+       for (i = 0; i < ARRAY_SIZE(algs); i++)
+               crypto_unregister_ahash(&algs[i]);
+       tasklet_kill(&dd->done_task);
+       tasklet_kill(&dd->queue_task);
+       iounmap(dd->io_base);
+       clk_put(dd->iclk);
+       omap_sham_dma_cleanup(dd);
+       if (dd->irq >= 0)
+               free_irq(dd->irq, dd);
+       kfree(dd);
+       dd = NULL;
+
+       return 0;
+}
+
+static struct platform_driver omap_sham_driver = {
+       .probe  = omap_sham_probe,
+       .remove = omap_sham_remove,
+       .driver = {
+               .name   = "omap-sham",
+               .owner  = THIS_MODULE,
+       },
+};
+
+static int __init omap_sham_mod_init(void)
+{
+       pr_info("loading %s driver\n", "omap-sham");
+
+       if (!cpu_class_is_omap2() ||
+               omap_type() != OMAP2_DEVICE_TYPE_SEC) {
+               pr_err("Unsupported cpu\n");
+               return -ENODEV;
+       }
+
+       return platform_driver_register(&omap_sham_driver);
+}
+
+static void __exit omap_sham_mod_exit(void)
+{
+       platform_driver_unregister(&omap_sham_driver);
+}
+
+module_init(omap_sham_mod_init);
+module_exit(omap_sham_mod_exit);
+
+MODULE_DESCRIPTION("OMAP SHA1/MD5 hw acceleration support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Dmitry Kasatkin");
index dc558a0..6a0f59d 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * talitos - Freescale Integrated Security Engine (SEC) device driver
  *
- * Copyright (c) 2008 Freescale Semiconductor, Inc.
+ * Copyright (c) 2008-2010 Freescale Semiconductor, Inc.
  *
  * Scatterlist Crypto API glue code copied from files with the following:
  * Copyright (c) 2006-2007 Herbert Xu <herbert@gondor.apana.org.au>
 #include <crypto/aes.h>
 #include <crypto/des.h>
 #include <crypto/sha.h>
+#include <crypto/md5.h>
 #include <crypto/aead.h>
 #include <crypto/authenc.h>
 #include <crypto/skcipher.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
 #include <crypto/scatterwalk.h>
 
 #include "talitos.h"
@@ -65,6 +68,13 @@ struct talitos_ptr {
        __be32 ptr;     /* address */
 };
 
+static const struct talitos_ptr zero_entry = {
+       .len = 0,
+       .j_extent = 0,
+       .eptr = 0,
+       .ptr = 0
+};
+
 /* descriptor */
 struct talitos_desc {
        __be32 hdr;                     /* header high bits */
@@ -146,6 +156,7 @@ struct talitos_private {
 /* .features flag */
 #define TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT 0x00000001
 #define TALITOS_FTR_HW_AUTH_CHECK 0x00000002
+#define TALITOS_FTR_SHA224_HWINIT 0x00000004
 
 static void to_talitos_ptr(struct talitos_ptr *talitos_ptr, dma_addr_t dma_addr)
 {
@@ -692,7 +703,7 @@ static void talitos_unregister_rng(struct device *dev)
 #define TALITOS_MAX_KEY_SIZE           64
 #define TALITOS_MAX_IV_LENGTH          16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */
 
-#define MD5_DIGEST_SIZE   16
+#define MD5_BLOCK_SIZE    64
 
 struct talitos_ctx {
        struct device *dev;
@@ -705,6 +716,23 @@ struct talitos_ctx {
        unsigned int authsize;
 };
 
+#define HASH_MAX_BLOCK_SIZE            SHA512_BLOCK_SIZE
+#define TALITOS_MDEU_MAX_CONTEXT_SIZE  TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512
+
+struct talitos_ahash_req_ctx {
+       u64 count;
+       u32 hw_context[TALITOS_MDEU_MAX_CONTEXT_SIZE / sizeof(u32)];
+       unsigned int hw_context_size;
+       u8 buf[HASH_MAX_BLOCK_SIZE];
+       u8 bufnext[HASH_MAX_BLOCK_SIZE];
+       unsigned int swinit;
+       unsigned int first;
+       unsigned int last;
+       unsigned int to_hash_later;
+       struct scatterlist bufsl[2];
+       struct scatterlist *psrc;
+};
+
 static int aead_setauthsize(struct crypto_aead *authenc,
                            unsigned int authsize)
 {
@@ -821,10 +849,14 @@ static void talitos_sg_unmap(struct device *dev,
                else
                        dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE);
 
-               if (edesc->dst_is_chained)
-                       talitos_unmap_sg_chain(dev, dst, DMA_FROM_DEVICE);
-               else
-                       dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE);
+               if (dst) {
+                       if (edesc->dst_is_chained)
+                               talitos_unmap_sg_chain(dev, dst,
+                                                      DMA_FROM_DEVICE);
+                       else
+                               dma_unmap_sg(dev, dst, dst_nents,
+                                            DMA_FROM_DEVICE);
+               }
        } else
                if (edesc->src_is_chained)
                        talitos_unmap_sg_chain(dev, src, DMA_BIDIRECTIONAL);
@@ -1114,12 +1146,67 @@ static int sg_count(struct scatterlist *sg_list, int nbytes, int *chained)
        return sg_nents;
 }
 
+/**
+ * sg_copy_end_to_buffer - Copy end data from SG list to a linear buffer
+ * @sgl:                The SG list
+ * @nents:              Number of SG entries
+ * @buf:                Where to copy to
+ * @buflen:             The number of bytes to copy
+ * @skip:               The number of bytes to skip before copying.
+ *                       Note: skip + buflen should equal SG total size.
+ *
+ * Returns the number of copied bytes.
+ *
+ **/
+static size_t sg_copy_end_to_buffer(struct scatterlist *sgl, unsigned int nents,
+                                   void *buf, size_t buflen, unsigned int skip)
+{
+       unsigned int offset = 0;
+       unsigned int boffset = 0;
+       struct sg_mapping_iter miter;
+       unsigned long flags;
+       unsigned int sg_flags = SG_MITER_ATOMIC;
+       size_t total_buffer = buflen + skip;
+
+       sg_flags |= SG_MITER_FROM_SG;
+
+       sg_miter_start(&miter, sgl, nents, sg_flags);
+
+       local_irq_save(flags);
+
+       while (sg_miter_next(&miter) && offset < total_buffer) {
+               unsigned int len;
+               unsigned int ignore;
+
+               if ((offset + miter.length) > skip) {
+                       if (offset < skip) {
+                               /* Copy part of this segment */
+                               ignore = skip - offset;
+                               len = miter.length - ignore;
+                               memcpy(buf + boffset, miter.addr + ignore, len);
+                       } else {
+                               /* Copy all of this segment */
+                               len = miter.length;
+                               memcpy(buf + boffset, miter.addr, len);
+                       }
+                       boffset += len;
+               }
+               offset += miter.length;
+       }
+
+       sg_miter_stop(&miter);
+
+       local_irq_restore(flags);
+       return boffset;
+}
+
 /*
  * allocate and map the extended descriptor
  */
 static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
                                                 struct scatterlist *src,
                                                 struct scatterlist *dst,
+                                                int hash_result,
                                                 unsigned int cryptlen,
                                                 unsigned int authsize,
                                                 int icv_stashing,
@@ -1139,11 +1226,16 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
        src_nents = sg_count(src, cryptlen + authsize, &src_chained);
        src_nents = (src_nents == 1) ? 0 : src_nents;
 
-       if (dst == src) {
-               dst_nents = src_nents;
+       if (hash_result) {
+               dst_nents = 0;
        } else {
-               dst_nents = sg_count(dst, cryptlen + authsize, &dst_chained);
-               dst_nents = (dst_nents == 1) ? 0 : dst_nents;
+               if (dst == src) {
+                       dst_nents = src_nents;
+               } else {
+                       dst_nents = sg_count(dst, cryptlen + authsize,
+                                            &dst_chained);
+                       dst_nents = (dst_nents == 1) ? 0 : dst_nents;
+               }
        }
 
        /*
@@ -1172,8 +1264,10 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
        edesc->src_is_chained = src_chained;
        edesc->dst_is_chained = dst_chained;
        edesc->dma_len = dma_len;
-       edesc->dma_link_tbl = dma_map_single(dev, &edesc->link_tbl[0],
-                                            edesc->dma_len, DMA_BIDIRECTIONAL);
+       if (dma_len)
+               edesc->dma_link_tbl = dma_map_single(dev, &edesc->link_tbl[0],
+                                                    edesc->dma_len,
+                                                    DMA_BIDIRECTIONAL);
 
        return edesc;
 }
@@ -1184,7 +1278,7 @@ static struct talitos_edesc *aead_edesc_alloc(struct aead_request *areq,
        struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
        struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
 
-       return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst,
+       return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst, 0,
                                   areq->cryptlen, ctx->authsize, icv_stashing,
                                   areq->base.flags);
 }
@@ -1441,8 +1535,8 @@ static struct talitos_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request *
        struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
        struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
 
-       return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst, areq->nbytes,
-                                  0, 0, areq->base.flags);
+       return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst, 0,
+                                  areq->nbytes, 0, 0, areq->base.flags);
 }
 
 static int ablkcipher_encrypt(struct ablkcipher_request *areq)
@@ -1478,15 +1572,329 @@ static int ablkcipher_decrypt(struct ablkcipher_request *areq)
        return common_nonsnoop(edesc, areq, NULL, ablkcipher_done);
 }
 
+static void common_nonsnoop_hash_unmap(struct device *dev,
+                                      struct talitos_edesc *edesc,
+                                      struct ahash_request *areq)
+{
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+       unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
+
+       /* When using hashctx-in, must unmap it. */
+       if (edesc->desc.ptr[1].len)
+               unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1],
+                                        DMA_TO_DEVICE);
+
+       if (edesc->desc.ptr[2].len)
+               unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2],
+                                        DMA_TO_DEVICE);
+
+       talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL);
+
+       if (edesc->dma_len)
+               dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
+                                DMA_BIDIRECTIONAL);
+
+}
+
+static void ahash_done(struct device *dev,
+                      struct talitos_desc *desc, void *context,
+                      int err)
+{
+       struct ahash_request *areq = context;
+       struct talitos_edesc *edesc =
+                container_of(desc, struct talitos_edesc, desc);
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+       if (!req_ctx->last && req_ctx->to_hash_later) {
+               /* Position any partial block for next update/final/finup */
+               memcpy(req_ctx->buf, req_ctx->bufnext, req_ctx->to_hash_later);
+       }
+       common_nonsnoop_hash_unmap(dev, edesc, areq);
+
+       kfree(edesc);
+
+       areq->base.complete(&areq->base, err);
+}
+
+static int common_nonsnoop_hash(struct talitos_edesc *edesc,
+                               struct ahash_request *areq, unsigned int length,
+                               void (*callback) (struct device *dev,
+                                                 struct talitos_desc *desc,
+                                                 void *context, int error))
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+       struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+       struct device *dev = ctx->dev;
+       struct talitos_desc *desc = &edesc->desc;
+       int sg_count, ret;
+
+       /* first DWORD empty */
+       desc->ptr[0] = zero_entry;
+
+       /* hash context in */
+       if (!req_ctx->first || req_ctx->swinit) {
+               map_single_talitos_ptr(dev, &desc->ptr[1],
+                                      req_ctx->hw_context_size,
+                                      (char *)req_ctx->hw_context, 0,
+                                      DMA_TO_DEVICE);
+               req_ctx->swinit = 0;
+       } else {
+               desc->ptr[1] = zero_entry;
+               /* Indicate next op is not the first. */
+               req_ctx->first = 0;
+       }
+
+       /* HMAC key */
+       if (ctx->keylen)
+               map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen,
+                                      (char *)&ctx->key, 0, DMA_TO_DEVICE);
+       else
+               desc->ptr[2] = zero_entry;
+
+       /*
+        * data in
+        */
+       desc->ptr[3].len = cpu_to_be16(length);
+       desc->ptr[3].j_extent = 0;
+
+       sg_count = talitos_map_sg(dev, req_ctx->psrc,
+                                 edesc->src_nents ? : 1,
+                                 DMA_TO_DEVICE,
+                                 edesc->src_is_chained);
+
+       if (sg_count == 1) {
+               to_talitos_ptr(&desc->ptr[3], sg_dma_address(req_ctx->psrc));
+       } else {
+               sg_count = sg_to_link_tbl(req_ctx->psrc, sg_count, length,
+                                         &edesc->link_tbl[0]);
+               if (sg_count > 1) {
+                       desc->ptr[3].j_extent |= DESC_PTR_LNKTBL_JUMP;
+                       to_talitos_ptr(&desc->ptr[3], edesc->dma_link_tbl);
+                       dma_sync_single_for_device(ctx->dev,
+                                                  edesc->dma_link_tbl,
+                                                  edesc->dma_len,
+                                                  DMA_BIDIRECTIONAL);
+               } else {
+                       /* Only one segment now, so no link tbl needed */
+                       to_talitos_ptr(&desc->ptr[3],
+                                      sg_dma_address(req_ctx->psrc));
+               }
+       }
+
+       /* fifth DWORD empty */
+       desc->ptr[4] = zero_entry;
+
+       /* hash/HMAC out -or- hash context out */
+       if (req_ctx->last)
+               map_single_talitos_ptr(dev, &desc->ptr[5],
+                                      crypto_ahash_digestsize(tfm),
+                                      areq->result, 0, DMA_FROM_DEVICE);
+       else
+               map_single_talitos_ptr(dev, &desc->ptr[5],
+                                      req_ctx->hw_context_size,
+                                      req_ctx->hw_context, 0, DMA_FROM_DEVICE);
+
+       /* last DWORD empty */
+       desc->ptr[6] = zero_entry;
+
+       ret = talitos_submit(dev, desc, callback, areq);
+       if (ret != -EINPROGRESS) {
+               common_nonsnoop_hash_unmap(dev, edesc, areq);
+               kfree(edesc);
+       }
+       return ret;
+}
+
+static struct talitos_edesc *ahash_edesc_alloc(struct ahash_request *areq,
+                                              unsigned int nbytes)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+       struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+       return talitos_edesc_alloc(ctx->dev, req_ctx->psrc, NULL, 1,
+                                  nbytes, 0, 0, areq->base.flags);
+}
+
+static int ahash_init(struct ahash_request *areq)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+       /* Initialize the context */
+       req_ctx->count = 0;
+       req_ctx->first = 1; /* first indicates h/w must init its context */
+       req_ctx->swinit = 0; /* assume h/w init of context */
+       req_ctx->hw_context_size =
+               (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE)
+                       ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
+                       : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
+
+       return 0;
+}
+
+/*
+ * on h/w without explicit sha224 support, we initialize h/w context
+ * manually with sha224 constants, and tell it to run sha256.
+ */
+static int ahash_init_sha224_swinit(struct ahash_request *areq)
+{
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+       ahash_init(areq);
+       req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/
+
+       req_ctx->hw_context[0] = cpu_to_be32(SHA224_H0);
+       req_ctx->hw_context[1] = cpu_to_be32(SHA224_H1);
+       req_ctx->hw_context[2] = cpu_to_be32(SHA224_H2);
+       req_ctx->hw_context[3] = cpu_to_be32(SHA224_H3);
+       req_ctx->hw_context[4] = cpu_to_be32(SHA224_H4);
+       req_ctx->hw_context[5] = cpu_to_be32(SHA224_H5);
+       req_ctx->hw_context[6] = cpu_to_be32(SHA224_H6);
+       req_ctx->hw_context[7] = cpu_to_be32(SHA224_H7);
+
+       /* init 64-bit count */
+       req_ctx->hw_context[8] = 0;
+       req_ctx->hw_context[9] = 0;
+
+       return 0;
+}
+
+static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+       struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+       struct talitos_edesc *edesc;
+       unsigned int blocksize =
+                       crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+       unsigned int nbytes_to_hash;
+       unsigned int to_hash_later;
+       unsigned int index;
+       int chained;
+
+       index = req_ctx->count & (blocksize - 1);
+       req_ctx->count += nbytes;
+
+       if (!req_ctx->last && (index + nbytes) < blocksize) {
+               /* Buffer the partial block */
+               sg_copy_to_buffer(areq->src,
+                                 sg_count(areq->src, nbytes, &chained),
+                                 req_ctx->buf + index, nbytes);
+               return 0;
+       }
+
+       if (index) {
+               /* partial block from previous update; chain it in. */
+               sg_init_table(req_ctx->bufsl, (nbytes) ? 2 : 1);
+               sg_set_buf(req_ctx->bufsl, req_ctx->buf, index);
+               if (nbytes)
+                       scatterwalk_sg_chain(req_ctx->bufsl, 2,
+                                            areq->src);
+               req_ctx->psrc = req_ctx->bufsl;
+       } else {
+               req_ctx->psrc = areq->src;
+       }
+       nbytes_to_hash =  index + nbytes;
+       if (!req_ctx->last) {
+               to_hash_later = (nbytes_to_hash & (blocksize - 1));
+               if (to_hash_later) {
+                       int nents;
+                       /* Must copy to_hash_later bytes from the end
+                        * to bufnext (a partial block) for later.
+                        */
+                       nents = sg_count(areq->src, nbytes, &chained);
+                       sg_copy_end_to_buffer(areq->src, nents,
+                                             req_ctx->bufnext,
+                                             to_hash_later,
+                                             nbytes - to_hash_later);
+
+                       /* Adjust count for what will be hashed now */
+                       nbytes_to_hash -= to_hash_later;
+               }
+               req_ctx->to_hash_later = to_hash_later;
+       }
+
+       /* allocate extended descriptor */
+       edesc = ahash_edesc_alloc(areq, nbytes_to_hash);
+       if (IS_ERR(edesc))
+               return PTR_ERR(edesc);
+
+       edesc->desc.hdr = ctx->desc_hdr_template;
+
+       /* On last one, request SEC to pad; otherwise continue */
+       if (req_ctx->last)
+               edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_PAD;
+       else
+               edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_CONT;
+
+       /* request SEC to INIT hash. */
+       if (req_ctx->first && !req_ctx->swinit)
+               edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_INIT;
+
+       /* When the tfm context has a keylen, it's an HMAC.
+        * A first or last (ie. not middle) descriptor must request HMAC.
+        */
+       if (ctx->keylen && (req_ctx->first || req_ctx->last))
+               edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC;
+
+       return common_nonsnoop_hash(edesc, areq, nbytes_to_hash,
+                                   ahash_done);
+}
+
+static int ahash_update(struct ahash_request *areq)
+{
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+       req_ctx->last = 0;
+
+       return ahash_process_req(areq, areq->nbytes);
+}
+
+static int ahash_final(struct ahash_request *areq)
+{
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+       req_ctx->last = 1;
+
+       return ahash_process_req(areq, 0);
+}
+
+static int ahash_finup(struct ahash_request *areq)
+{
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+       req_ctx->last = 1;
+
+       return ahash_process_req(areq, areq->nbytes);
+}
+
+static int ahash_digest(struct ahash_request *areq)
+{
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+       struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+
+       ahash->init(areq);
+       req_ctx->last = 1;
+
+       return ahash_process_req(areq, areq->nbytes);
+}
+
 struct talitos_alg_template {
-       struct crypto_alg alg;
+       u32 type;
+       union {
+               struct crypto_alg crypto;
+               struct ahash_alg hash;
+       } alg;
        __be32 desc_hdr_template;
 };
 
 static struct talitos_alg_template driver_algs[] = {
        /* AEAD algorithms.  These use a single-pass ipsec_esp descriptor */
-       {
-               .alg = {
+       {       .type = CRYPTO_ALG_TYPE_AEAD,
+               .alg.crypto = {
                        .cra_name = "authenc(hmac(sha1),cbc(aes))",
                        .cra_driver_name = "authenc-hmac-sha1-cbc-aes-talitos",
                        .cra_blocksize = AES_BLOCK_SIZE,
@@ -1511,8 +1919,8 @@ static struct talitos_alg_template driver_algs[] = {
                                     DESC_HDR_MODE1_MDEU_PAD |
                                     DESC_HDR_MODE1_MDEU_SHA1_HMAC,
        },
-       {
-               .alg = {
+       {       .type = CRYPTO_ALG_TYPE_AEAD,
+               .alg.crypto = {
                        .cra_name = "authenc(hmac(sha1),cbc(des3_ede))",
                        .cra_driver_name = "authenc-hmac-sha1-cbc-3des-talitos",
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
@@ -1538,8 +1946,8 @@ static struct talitos_alg_template driver_algs[] = {
                                     DESC_HDR_MODE1_MDEU_PAD |
                                     DESC_HDR_MODE1_MDEU_SHA1_HMAC,
        },
-       {
-               .alg = {
+       {       .type = CRYPTO_ALG_TYPE_AEAD,
+               .alg.crypto = {
                        .cra_name = "authenc(hmac(sha256),cbc(aes))",
                        .cra_driver_name = "authenc-hmac-sha256-cbc-aes-talitos",
                        .cra_blocksize = AES_BLOCK_SIZE,
@@ -1564,8 +1972,8 @@ static struct talitos_alg_template driver_algs[] = {
                                     DESC_HDR_MODE1_MDEU_PAD |
                                     DESC_HDR_MODE1_MDEU_SHA256_HMAC,
        },
-       {
-               .alg = {
+       {       .type = CRYPTO_ALG_TYPE_AEAD,
+               .alg.crypto = {
                        .cra_name = "authenc(hmac(sha256),cbc(des3_ede))",
                        .cra_driver_name = "authenc-hmac-sha256-cbc-3des-talitos",
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
@@ -1591,8 +1999,8 @@ static struct talitos_alg_template driver_algs[] = {
                                     DESC_HDR_MODE1_MDEU_PAD |
                                     DESC_HDR_MODE1_MDEU_SHA256_HMAC,
        },
-       {
-               .alg = {
+       {       .type = CRYPTO_ALG_TYPE_AEAD,
+               .alg.crypto = {
                        .cra_name = "authenc(hmac(md5),cbc(aes))",
                        .cra_driver_name = "authenc-hmac-md5-cbc-aes-talitos",
                        .cra_blocksize = AES_BLOCK_SIZE,
@@ -1617,8 +2025,8 @@ static struct talitos_alg_template driver_algs[] = {
                                     DESC_HDR_MODE1_MDEU_PAD |
                                     DESC_HDR_MODE1_MDEU_MD5_HMAC,
        },
-       {
-               .alg = {
+       {       .type = CRYPTO_ALG_TYPE_AEAD,
+               .alg.crypto = {
                        .cra_name = "authenc(hmac(md5),cbc(des3_ede))",
                        .cra_driver_name = "authenc-hmac-md5-cbc-3des-talitos",
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
@@ -1645,8 +2053,8 @@ static struct talitos_alg_template driver_algs[] = {
                                     DESC_HDR_MODE1_MDEU_MD5_HMAC,
        },
        /* ABLKCIPHER algorithms. */
-       {
-               .alg = {
+       {       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+               .alg.crypto = {
                        .cra_name = "cbc(aes)",
                        .cra_driver_name = "cbc-aes-talitos",
                        .cra_blocksize = AES_BLOCK_SIZE,
@@ -1667,8 +2075,8 @@ static struct talitos_alg_template driver_algs[] = {
                                     DESC_HDR_SEL0_AESU |
                                     DESC_HDR_MODE0_AESU_CBC,
        },
-       {
-               .alg = {
+       {       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+               .alg.crypto = {
                        .cra_name = "cbc(des3_ede)",
                        .cra_driver_name = "cbc-3des-talitos",
                        .cra_blocksize = DES3_EDE_BLOCK_SIZE,
@@ -1689,14 +2097,140 @@ static struct talitos_alg_template driver_algs[] = {
                                     DESC_HDR_SEL0_DEU |
                                     DESC_HDR_MODE0_DEU_CBC |
                                     DESC_HDR_MODE0_DEU_3DES,
-       }
+       },
+       /* AHASH algorithms. */
+       {       .type = CRYPTO_ALG_TYPE_AHASH,
+               .alg.hash = {
+                       .init = ahash_init,
+                       .update = ahash_update,
+                       .final = ahash_final,
+                       .finup = ahash_finup,
+                       .digest = ahash_digest,
+                       .halg.digestsize = MD5_DIGEST_SIZE,
+                       .halg.base = {
+                               .cra_name = "md5",
+                               .cra_driver_name = "md5-talitos",
+                               .cra_blocksize = MD5_BLOCK_SIZE,
+                               .cra_flags = CRYPTO_ALG_TYPE_AHASH |
+                                            CRYPTO_ALG_ASYNC,
+                               .cra_type = &crypto_ahash_type
+                       }
+               },
+               .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+                                    DESC_HDR_SEL0_MDEUA |
+                                    DESC_HDR_MODE0_MDEU_MD5,
+       },
+       {       .type = CRYPTO_ALG_TYPE_AHASH,
+               .alg.hash = {
+                       .init = ahash_init,
+                       .update = ahash_update,
+                       .final = ahash_final,
+                       .finup = ahash_finup,
+                       .digest = ahash_digest,
+                       .halg.digestsize = SHA1_DIGEST_SIZE,
+                       .halg.base = {
+                               .cra_name = "sha1",
+                               .cra_driver_name = "sha1-talitos",
+                               .cra_blocksize = SHA1_BLOCK_SIZE,
+                               .cra_flags = CRYPTO_ALG_TYPE_AHASH |
+                                            CRYPTO_ALG_ASYNC,
+                               .cra_type = &crypto_ahash_type
+                       }
+               },
+               .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+                                    DESC_HDR_SEL0_MDEUA |
+                                    DESC_HDR_MODE0_MDEU_SHA1,
+       },
+       {       .type = CRYPTO_ALG_TYPE_AHASH,
+               .alg.hash = {
+                       .init = ahash_init,
+                       .update = ahash_update,
+                       .final = ahash_final,
+                       .finup = ahash_finup,
+                       .digest = ahash_digest,
+                       .halg.digestsize = SHA224_DIGEST_SIZE,
+                       .halg.base = {
+                               .cra_name = "sha224",
+                               .cra_driver_name = "sha224-talitos",
+                               .cra_blocksize = SHA224_BLOCK_SIZE,
+                               .cra_flags = CRYPTO_ALG_TYPE_AHASH |
+                                            CRYPTO_ALG_ASYNC,
+                               .cra_type = &crypto_ahash_type
+                       }
+               },
+               .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+                                    DESC_HDR_SEL0_MDEUA |
+                                    DESC_HDR_MODE0_MDEU_SHA224,
+       },
+       {       .type = CRYPTO_ALG_TYPE_AHASH,
+               .alg.hash = {
+                       .init = ahash_init,
+                       .update = ahash_update,
+                       .final = ahash_final,
+                       .finup = ahash_finup,
+                       .digest = ahash_digest,
+                       .halg.digestsize = SHA256_DIGEST_SIZE,
+                       .halg.base = {
+                               .cra_name = "sha256",
+                               .cra_driver_name = "sha256-talitos",
+                               .cra_blocksize = SHA256_BLOCK_SIZE,
+                               .cra_flags = CRYPTO_ALG_TYPE_AHASH |
+                                            CRYPTO_ALG_ASYNC,
+                               .cra_type = &crypto_ahash_type
+                       }
+               },
+               .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+                                    DESC_HDR_SEL0_MDEUA |
+                                    DESC_HDR_MODE0_MDEU_SHA256,
+       },
+       {       .type = CRYPTO_ALG_TYPE_AHASH,
+               .alg.hash = {
+                       .init = ahash_init,
+                       .update = ahash_update,
+                       .final = ahash_final,
+                       .finup = ahash_finup,
+                       .digest = ahash_digest,
+                       .halg.digestsize = SHA384_DIGEST_SIZE,
+                       .halg.base = {
+                               .cra_name = "sha384",
+                               .cra_driver_name = "sha384-talitos",
+                               .cra_blocksize = SHA384_BLOCK_SIZE,
+                               .cra_flags = CRYPTO_ALG_TYPE_AHASH |
+                                            CRYPTO_ALG_ASYNC,
+                               .cra_type = &crypto_ahash_type
+                       }
+               },
+               .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+                                    DESC_HDR_SEL0_MDEUB |
+                                    DESC_HDR_MODE0_MDEUB_SHA384,
+       },
+       {       .type = CRYPTO_ALG_TYPE_AHASH,
+               .alg.hash = {
+                       .init = ahash_init,
+                       .update = ahash_update,
+                       .final = ahash_final,
+                       .finup = ahash_finup,
+                       .digest = ahash_digest,
+                       .halg.digestsize = SHA512_DIGEST_SIZE,
+                       .halg.base = {
+                               .cra_name = "sha512",
+                               .cra_driver_name = "sha512-talitos",
+                               .cra_blocksize = SHA512_BLOCK_SIZE,
+                               .cra_flags = CRYPTO_ALG_TYPE_AHASH |
+                                            CRYPTO_ALG_ASYNC,
+                               .cra_type = &crypto_ahash_type
+                       }
+               },
+               .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+                                    DESC_HDR_SEL0_MDEUB |
+                                    DESC_HDR_MODE0_MDEUB_SHA512,
+       },
 };
 
 struct talitos_crypto_alg {
        struct list_head entry;
        struct device *dev;
-       __be32 desc_hdr_template;
-       struct crypto_alg crypto_alg;
+       struct talitos_alg_template algt;
 };
 
 static int talitos_cra_init(struct crypto_tfm *tfm)
@@ -1705,13 +2239,28 @@ static int talitos_cra_init(struct crypto_tfm *tfm)
        struct talitos_crypto_alg *talitos_alg;
        struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
 
-       talitos_alg =  container_of(alg, struct talitos_crypto_alg, crypto_alg);
+       if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_AHASH)
+               talitos_alg = container_of(__crypto_ahash_alg(alg),
+                                          struct talitos_crypto_alg,
+                                          algt.alg.hash);
+       else
+               talitos_alg = container_of(alg, struct talitos_crypto_alg,
+                                          algt.alg.crypto);
 
        /* update context with ptr to dev */
        ctx->dev = talitos_alg->dev;
 
        /* copy descriptor header template value */
-       ctx->desc_hdr_template = talitos_alg->desc_hdr_template;
+       ctx->desc_hdr_template = talitos_alg->algt.desc_hdr_template;
+
+       return 0;
+}
+
+static int talitos_cra_init_aead(struct crypto_tfm *tfm)
+{
+       struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       talitos_cra_init(tfm);
 
        /* random first IV */
        get_random_bytes(ctx->iv, TALITOS_MAX_IV_LENGTH);
@@ -1719,6 +2268,19 @@ static int talitos_cra_init(struct crypto_tfm *tfm)
        return 0;
 }
 
+static int talitos_cra_init_ahash(struct crypto_tfm *tfm)
+{
+       struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       talitos_cra_init(tfm);
+
+       ctx->keylen = 0;
+       crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+                                sizeof(struct talitos_ahash_req_ctx));
+
+       return 0;
+}
+
 /*
  * given the alg's descriptor header template, determine whether descriptor
  * type and primary/secondary execution units required match the hw
@@ -1747,7 +2309,15 @@ static int talitos_remove(struct of_device *ofdev)
        int i;
 
        list_for_each_entry_safe(t_alg, n, &priv->alg_list, entry) {
-               crypto_unregister_alg(&t_alg->crypto_alg);
+               switch (t_alg->algt.type) {
+               case CRYPTO_ALG_TYPE_ABLKCIPHER:
+               case CRYPTO_ALG_TYPE_AEAD:
+                       crypto_unregister_alg(&t_alg->algt.alg.crypto);
+                       break;
+               case CRYPTO_ALG_TYPE_AHASH:
+                       crypto_unregister_ahash(&t_alg->algt.alg.hash);
+                       break;
+               }
                list_del(&t_alg->entry);
                kfree(t_alg);
        }
@@ -1781,6 +2351,7 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
                                                    struct talitos_alg_template
                                                           *template)
 {
+       struct talitos_private *priv = dev_get_drvdata(dev);
        struct talitos_crypto_alg *t_alg;
        struct crypto_alg *alg;
 
@@ -1788,16 +2359,36 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
        if (!t_alg)
                return ERR_PTR(-ENOMEM);
 
-       alg = &t_alg->crypto_alg;
-       *alg = template->alg;
+       t_alg->algt = *template;
+
+       switch (t_alg->algt.type) {
+       case CRYPTO_ALG_TYPE_ABLKCIPHER:
+               alg = &t_alg->algt.alg.crypto;
+               alg->cra_init = talitos_cra_init;
+               break;
+       case CRYPTO_ALG_TYPE_AEAD:
+               alg = &t_alg->algt.alg.crypto;
+               alg->cra_init = talitos_cra_init_aead;
+               break;
+       case CRYPTO_ALG_TYPE_AHASH:
+               alg = &t_alg->algt.alg.hash.halg.base;
+               alg->cra_init = talitos_cra_init_ahash;
+               if (!(priv->features & TALITOS_FTR_SHA224_HWINIT) &&
+                   !strcmp(alg->cra_name, "sha224")) {
+                       t_alg->algt.alg.hash.init = ahash_init_sha224_swinit;
+                       t_alg->algt.desc_hdr_template =
+                                       DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+                                       DESC_HDR_SEL0_MDEUA |
+                                       DESC_HDR_MODE0_MDEU_SHA256;
+               }
+               break;
+       }
 
        alg->cra_module = THIS_MODULE;
-       alg->cra_init = talitos_cra_init;
        alg->cra_priority = TALITOS_CRA_PRIORITY;
        alg->cra_alignmask = 0;
        alg->cra_ctxsize = sizeof(struct talitos_ctx);
 
-       t_alg->desc_hdr_template = template->desc_hdr_template;
        t_alg->dev = dev;
 
        return t_alg;
@@ -1877,7 +2468,8 @@ static int talitos_probe(struct of_device *ofdev,
                priv->features |= TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT;
 
        if (of_device_is_compatible(np, "fsl,sec2.1"))
-               priv->features |= TALITOS_FTR_HW_AUTH_CHECK;
+               priv->features |= TALITOS_FTR_HW_AUTH_CHECK |
+                                 TALITOS_FTR_SHA224_HWINIT;
 
        priv->chan = kzalloc(sizeof(struct talitos_channel) *
                             priv->num_channels, GFP_KERNEL);
@@ -1931,6 +2523,7 @@ static int talitos_probe(struct of_device *ofdev,
        for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
                if (hw_supports(dev, driver_algs[i].desc_hdr_template)) {
                        struct talitos_crypto_alg *t_alg;
+                       char *name = NULL;
 
                        t_alg = talitos_alg_alloc(dev, &driver_algs[i]);
                        if (IS_ERR(t_alg)) {
@@ -1938,15 +2531,27 @@ static int talitos_probe(struct of_device *ofdev,
                                goto err_out;
                        }
 
-                       err = crypto_register_alg(&t_alg->crypto_alg);
+                       switch (t_alg->algt.type) {
+                       case CRYPTO_ALG_TYPE_ABLKCIPHER:
+                       case CRYPTO_ALG_TYPE_AEAD:
+                               err = crypto_register_alg(
+                                               &t_alg->algt.alg.crypto);
+                               name = t_alg->algt.alg.crypto.cra_driver_name;
+                               break;
+                       case CRYPTO_ALG_TYPE_AHASH:
+                               err = crypto_register_ahash(
+                                               &t_alg->algt.alg.hash);
+                               name =
+                                t_alg->algt.alg.hash.halg.base.cra_driver_name;
+                               break;
+                       }
                        if (err) {
                                dev_err(dev, "%s alg registration failed\n",
-                                       t_alg->crypto_alg.cra_driver_name);
+                                       name);
                                kfree(t_alg);
                        } else {
                                list_add_tail(&t_alg->entry, &priv->alg_list);
-                               dev_info(dev, "%s\n",
-                                        t_alg->crypto_alg.cra_driver_name);
+                               dev_info(dev, "%s\n", name);
                        }
                }
        }
index ff5a145..0b746ac 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Freescale SEC (talitos) device register and descriptor header defines
  *
- * Copyright (c) 2006-2008 Freescale Semiconductor, Inc.
+ * Copyright (c) 2006-2010 Freescale Semiconductor, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
 #define TALITOS_CRCUISR                        0xf030 /* cyclic redundancy check unit*/
 #define TALITOS_CRCUISR_LO             0xf034
 
+#define TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256      0x28
+#define TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512                0x48
+
 /*
  * talitos descriptor header (hdr) bits
  */
 #define        DESC_HDR_MODE0_AESU_CBC         cpu_to_be32(0x00200000)
 #define        DESC_HDR_MODE0_DEU_CBC          cpu_to_be32(0x00400000)
 #define        DESC_HDR_MODE0_DEU_3DES         cpu_to_be32(0x00200000)
+#define        DESC_HDR_MODE0_MDEU_CONT        cpu_to_be32(0x08000000)
 #define        DESC_HDR_MODE0_MDEU_INIT        cpu_to_be32(0x01000000)
 #define        DESC_HDR_MODE0_MDEU_HMAC        cpu_to_be32(0x00800000)
 #define        DESC_HDR_MODE0_MDEU_PAD         cpu_to_be32(0x00400000)
+#define        DESC_HDR_MODE0_MDEU_SHA224      cpu_to_be32(0x00300000)
 #define        DESC_HDR_MODE0_MDEU_MD5         cpu_to_be32(0x00200000)
 #define        DESC_HDR_MODE0_MDEU_SHA256      cpu_to_be32(0x00100000)
 #define        DESC_HDR_MODE0_MDEU_SHA1        cpu_to_be32(0x00000000)
+#define        DESC_HDR_MODE0_MDEUB_SHA384     cpu_to_be32(0x00000000)
+#define        DESC_HDR_MODE0_MDEUB_SHA512     cpu_to_be32(0x00200000)
 #define        DESC_HDR_MODE0_MDEU_MD5_HMAC    (DESC_HDR_MODE0_MDEU_MD5 | \
                                         DESC_HDR_MODE0_MDEU_HMAC)
 #define        DESC_HDR_MODE0_MDEU_SHA256_HMAC (DESC_HDR_MODE0_MDEU_SHA256 | \
 #define        DESC_HDR_MODE1_MDEU_INIT        cpu_to_be32(0x00001000)
 #define        DESC_HDR_MODE1_MDEU_HMAC        cpu_to_be32(0x00000800)
 #define        DESC_HDR_MODE1_MDEU_PAD         cpu_to_be32(0x00000400)
+#define        DESC_HDR_MODE1_MDEU_SHA224      cpu_to_be32(0x00000300)
 #define        DESC_HDR_MODE1_MDEU_MD5         cpu_to_be32(0x00000200)
 #define        DESC_HDR_MODE1_MDEU_SHA256      cpu_to_be32(0x00000100)
 #define        DESC_HDR_MODE1_MDEU_SHA1        cpu_to_be32(0x00000000)
+#define        DESC_HDR_MODE1_MDEUB_SHA384     cpu_to_be32(0x00000000)
+#define        DESC_HDR_MODE1_MDEUB_SHA512     cpu_to_be32(0x00000200)
 #define        DESC_HDR_MODE1_MDEU_MD5_HMAC    (DESC_HDR_MODE1_MDEU_MD5 | \
                                         DESC_HDR_MODE1_MDEU_HMAC)
 #define        DESC_HDR_MODE1_MDEU_SHA256_HMAC (DESC_HDR_MODE1_MDEU_SHA256 | \
index fc0d575..59c3e5b 100644 (file)
@@ -103,6 +103,23 @@ struct blkcipher_walk {
        unsigned int blocksize;
 };
 
+struct ablkcipher_walk {
+       struct {
+               struct page *page;
+               unsigned int offset;
+       } src, dst;
+
+       struct scatter_walk     in;
+       unsigned int            nbytes;
+       struct scatter_walk     out;
+       unsigned int            total;
+       struct list_head        buffers;
+       u8                      *iv_buffer;
+       u8                      *iv;
+       int                     flags;
+       unsigned int            blocksize;
+};
+
 extern const struct crypto_type crypto_ablkcipher_type;
 extern const struct crypto_type crypto_aead_type;
 extern const struct crypto_type crypto_blkcipher_type;
@@ -173,6 +190,12 @@ int blkcipher_walk_virt_block(struct blkcipher_desc *desc,
                              struct blkcipher_walk *walk,
                              unsigned int blocksize);
 
+int ablkcipher_walk_done(struct ablkcipher_request *req,
+                        struct ablkcipher_walk *walk, int err);
+int ablkcipher_walk_phys(struct ablkcipher_request *req,
+                        struct ablkcipher_walk *walk);
+void __ablkcipher_walk_complete(struct ablkcipher_walk *walk);
+
 static inline void *crypto_tfm_ctx_aligned(struct crypto_tfm *tfm)
 {
        return PTR_ALIGN(crypto_tfm_ctx(tfm),
@@ -283,6 +306,23 @@ static inline void blkcipher_walk_init(struct blkcipher_walk *walk,
        walk->total = nbytes;
 }
 
+static inline void ablkcipher_walk_init(struct ablkcipher_walk *walk,
+                                       struct scatterlist *dst,
+                                       struct scatterlist *src,
+                                       unsigned int nbytes)
+{
+       walk->in.sg = src;
+       walk->out.sg = dst;
+       walk->total = nbytes;
+       INIT_LIST_HEAD(&walk->buffers);
+}
+
+static inline void ablkcipher_walk_complete(struct ablkcipher_walk *walk)
+{
+       if (unlikely(!list_empty(&walk->buffers)))
+               __ablkcipher_walk_complete(walk);
+}
+
 static inline struct crypto_async_request *crypto_get_backlog(
        struct crypto_queue *queue)
 {
index 51611da..8d84062 100644 (file)
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
+#include <linux/timer.h>
 
+/**
+ * struct padata_priv -  Embedded to the users data structure.
+ *
+ * @list: List entry, to attach to the padata lists.
+ * @pd: Pointer to the internal control structure.
+ * @cb_cpu: Callback cpu for serializatioon.
+ * @seq_nr: Sequence number of the parallelized data object.
+ * @info: Used to pass information from the parallel to the serial function.
+ * @parallel: Parallel execution function.
+ * @serial: Serial complete function.
+ */
 struct padata_priv {
        struct list_head        list;
        struct parallel_data    *pd;
@@ -35,11 +47,29 @@ struct padata_priv {
        void                    (*serial)(struct padata_priv *padata);
 };
 
+/**
+ * struct padata_list
+ *
+ * @list: List head.
+ * @lock: List lock.
+ */
 struct padata_list {
        struct list_head        list;
        spinlock_t              lock;
 };
 
+/**
+ * struct padata_queue - The percpu padata queues.
+ *
+ * @parallel: List to wait for parallelization.
+ * @reorder: List to wait for reordering after parallel processing.
+ * @serial: List to wait for serialization after reordering.
+ * @pwork: work struct for parallelization.
+ * @swork: work struct for serialization.
+ * @pd: Backpointer to the internal control structure.
+ * @num_obj: Number of objects that are processed by this cpu.
+ * @cpu_index: Index of the cpu.
+ */
 struct padata_queue {
        struct padata_list      parallel;
        struct padata_list      reorder;
@@ -51,6 +81,20 @@ struct padata_queue {
        int                     cpu_index;
 };
 
+/**
+ * struct parallel_data - Internal control structure, covers everything
+ * that depends on the cpumask in use.
+ *
+ * @pinst: padata instance.
+ * @queue: percpu padata queues.
+ * @seq_nr: The sequence number that will be attached to the next object.
+ * @reorder_objects: Number of objects waiting in the reorder queues.
+ * @refcnt: Number of objects holding a reference on this parallel_data.
+ * @max_seq_nr:  Maximal used sequence number.
+ * @cpumask: cpumask in use.
+ * @lock: Reorder lock.
+ * @timer: Reorder timer.
+ */
 struct parallel_data {
        struct padata_instance  *pinst;
        struct padata_queue     *queue;
@@ -60,8 +104,19 @@ struct parallel_data {
        unsigned int            max_seq_nr;
        cpumask_var_t           cpumask;
        spinlock_t              lock;
+       struct timer_list       timer;
 };
 
+/**
+ * struct padata_instance - The overall control structure.
+ *
+ * @cpu_notifier: cpu hotplug notifier.
+ * @wq: The workqueue in use.
+ * @pd: The internal control structure.
+ * @cpumask: User supplied cpumask.
+ * @lock: padata instance lock.
+ * @flags: padata flags.
+ */
 struct padata_instance {
        struct notifier_block   cpu_notifier;
        struct workqueue_struct *wq;
index fd03513..b1c9857 100644 (file)
@@ -29,7 +29,7 @@
 #include <linux/rcupdate.h>
 
 #define MAX_SEQ_NR INT_MAX - NR_CPUS
-#define MAX_OBJ_NUM 10000 * NR_CPUS
+#define MAX_OBJ_NUM 1000
 
 static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
 {
@@ -88,7 +88,7 @@ static void padata_parallel_worker(struct work_struct *work)
        local_bh_enable();
 }
 
-/*
+/**
  * padata_do_parallel - padata parallelization function
  *
  * @pinst: padata instance
@@ -152,6 +152,23 @@ out:
 }
 EXPORT_SYMBOL(padata_do_parallel);
 
+/*
+ * padata_get_next - Get the next object that needs serialization.
+ *
+ * Return values are:
+ *
+ * A pointer to the control struct of the next object that needs
+ * serialization, if present in one of the percpu reorder queues.
+ *
+ * NULL, if all percpu reorder queues are empty.
+ *
+ * -EINPROGRESS, if the next object that needs serialization will
+ *  be parallel processed by another cpu and is not yet present in
+ *  the cpu's reorder queue.
+ *
+ * -ENODATA, if this cpu has to do the parallel processing for
+ *  the next object.
+ */
 static struct padata_priv *padata_get_next(struct parallel_data *pd)
 {
        int cpu, num_cpus, empty, calc_seq_nr;
@@ -173,7 +190,7 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
 
                /*
                 * Calculate the seq_nr of the object that should be
-                * next in this queue.
+                * next in this reorder queue.
                 */
                overrun = 0;
                calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
@@ -231,7 +248,8 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
                goto out;
        }
 
-       if (next_nr % num_cpus == next_queue->cpu_index) {
+       queue = per_cpu_ptr(pd->queue, smp_processor_id());
+       if (queue->cpu_index == next_queue->cpu_index) {
                padata = ERR_PTR(-ENODATA);
                goto out;
        }
@@ -247,19 +265,40 @@ static void padata_reorder(struct parallel_data *pd)
        struct padata_queue *queue;
        struct padata_instance *pinst = pd->pinst;
 
-try_again:
+       /*
+        * We need to ensure that only one cpu can work on dequeueing of
+        * the reorder queue the time. Calculating in which percpu reorder
+        * queue the next object will arrive takes some time. A spinlock
+        * would be highly contended. Also it is not clear in which order
+        * the objects arrive to the reorder queues. So a cpu could wait to
+        * get the lock just to notice that there is nothing to do at the
+        * moment. Therefore we use a trylock and let the holder of the lock
+        * care for all the objects enqueued during the holdtime of the lock.
+        */
        if (!spin_trylock_bh(&pd->lock))
-               goto out;
+               return;
 
        while (1) {
                padata = padata_get_next(pd);
 
+               /*
+                * All reorder queues are empty, or the next object that needs
+                * serialization is parallel processed by another cpu and is
+                * still on it's way to the cpu's reorder queue, nothing to
+                * do for now.
+                */
                if (!padata || PTR_ERR(padata) == -EINPROGRESS)
                        break;
 
+               /*
+                * This cpu has to do the parallel processing of the next
+                * object. It's waiting in the cpu's parallelization queue,
+                * so exit imediately.
+                */
                if (PTR_ERR(padata) == -ENODATA) {
+                       del_timer(&pd->timer);
                        spin_unlock_bh(&pd->lock);
-                       goto out;
+                       return;
                }
 
                queue = per_cpu_ptr(pd->queue, padata->cb_cpu);
@@ -273,13 +312,27 @@ try_again:
 
        spin_unlock_bh(&pd->lock);
 
-       if (atomic_read(&pd->reorder_objects))
-               goto try_again;
+       /*
+        * The next object that needs serialization might have arrived to
+        * the reorder queues in the meantime, we will be called again
+        * from the timer function if noone else cares for it.
+        */
+       if (atomic_read(&pd->reorder_objects)
+                       && !(pinst->flags & PADATA_RESET))
+               mod_timer(&pd->timer, jiffies + HZ);
+       else
+               del_timer(&pd->timer);
 
-out:
        return;
 }
 
+static void padata_reorder_timer(unsigned long arg)
+{
+       struct parallel_data *pd = (struct parallel_data *)arg;
+
+       padata_reorder(pd);
+}
+
 static void padata_serial_worker(struct work_struct *work)
 {
        struct padata_queue *queue;
@@ -308,7 +361,7 @@ static void padata_serial_worker(struct work_struct *work)
        local_bh_enable();
 }
 
-/*
+/**
  * padata_do_serial - padata serialization function
  *
  * @padata: object to be serialized.
@@ -338,6 +391,7 @@ void padata_do_serial(struct padata_priv *padata)
 }
 EXPORT_SYMBOL(padata_do_serial);
 
+/* Allocate and initialize the internal cpumask dependend resources. */
 static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
                                             const struct cpumask *cpumask)
 {
@@ -358,17 +412,15 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
        if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL))
                goto err_free_queue;
 
-       for_each_possible_cpu(cpu) {
+       cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
+
+       for_each_cpu(cpu, pd->cpumask) {
                queue = per_cpu_ptr(pd->queue, cpu);
 
                queue->pd = pd;
 
-               if (cpumask_test_cpu(cpu, cpumask)
-                   && cpumask_test_cpu(cpu, cpu_active_mask)) {
-                       queue->cpu_index = cpu_index;
-                       cpu_index++;
-               } else
-                       queue->cpu_index = -1;
+               queue->cpu_index = cpu_index;
+               cpu_index++;
 
                INIT_LIST_HEAD(&queue->reorder.list);
                INIT_LIST_HEAD(&queue->parallel.list);
@@ -382,11 +434,10 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
                atomic_set(&queue->num_obj, 0);
        }
 
-       cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
-
        num_cpus = cpumask_weight(pd->cpumask);
        pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;
 
+       setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
        atomic_set(&pd->seq_nr, -1);
        atomic_set(&pd->reorder_objects, 0);
        atomic_set(&pd->refcnt, 0);
@@ -410,6 +461,31 @@ static void padata_free_pd(struct parallel_data *pd)
        kfree(pd);
 }
 
+/* Flush all objects out of the padata queues. */
+static void padata_flush_queues(struct parallel_data *pd)
+{
+       int cpu;
+       struct padata_queue *queue;
+
+       for_each_cpu(cpu, pd->cpumask) {
+               queue = per_cpu_ptr(pd->queue, cpu);
+               flush_work(&queue->pwork);
+       }
+
+       del_timer_sync(&pd->timer);
+
+       if (atomic_read(&pd->reorder_objects))
+               padata_reorder(pd);
+
+       for_each_cpu(cpu, pd->cpumask) {
+               queue = per_cpu_ptr(pd->queue, cpu);
+               flush_work(&queue->swork);
+       }
+
+       BUG_ON(atomic_read(&pd->refcnt) != 0);
+}
+
+/* Replace the internal control stucture with a new one. */
 static void padata_replace(struct padata_instance *pinst,
                           struct parallel_data *pd_new)
 {
@@ -421,17 +497,13 @@ static void padata_replace(struct padata_instance *pinst,
 
        synchronize_rcu();
 
-       while (atomic_read(&pd_old->refcnt) != 0)
-               yield();
-
-       flush_workqueue(pinst->wq);
-
+       padata_flush_queues(pd_old);
        padata_free_pd(pd_old);
 
        pinst->flags &= ~PADATA_RESET;
 }
 
-/*
+/**
  * padata_set_cpumask - set the cpumask that padata should use
  *
  * @pinst: padata instance
@@ -443,10 +515,10 @@ int padata_set_cpumask(struct padata_instance *pinst,
        struct parallel_data *pd;
        int err = 0;
 
-       might_sleep();
-
        mutex_lock(&pinst->lock);
 
+       get_online_cpus();
+
        pd = padata_alloc_pd(pinst, cpumask);
        if (!pd) {
                err = -ENOMEM;
@@ -458,6 +530,8 @@ int padata_set_cpumask(struct padata_instance *pinst,
        padata_replace(pinst, pd);
 
 out:
+       put_online_cpus();
+
        mutex_unlock(&pinst->lock);
 
        return err;
@@ -479,7 +553,7 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
        return 0;
 }
 
-/*
+/**
  * padata_add_cpu - add a cpu to the padata cpumask
  *
  * @pinst: padata instance
@@ -489,12 +563,12 @@ int padata_add_cpu(struct padata_instance *pinst, int cpu)
 {
        int err;
 
-       might_sleep();
-
        mutex_lock(&pinst->lock);
 
+       get_online_cpus();
        cpumask_set_cpu(cpu, pinst->cpumask);
        err = __padata_add_cpu(pinst, cpu);
+       put_online_cpus();
 
        mutex_unlock(&pinst->lock);
 
@@ -517,7 +591,7 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
        return 0;
 }
 
-/*
+/**
  * padata_remove_cpu - remove a cpu from the padata cpumask
  *
  * @pinst: padata instance
@@ -527,12 +601,12 @@ int padata_remove_cpu(struct padata_instance *pinst, int cpu)
 {
        int err;
 
-       might_sleep();
-
        mutex_lock(&pinst->lock);
 
+       get_online_cpus();
        cpumask_clear_cpu(cpu, pinst->cpumask);
        err = __padata_remove_cpu(pinst, cpu);
+       put_online_cpus();
 
        mutex_unlock(&pinst->lock);
 
@@ -540,38 +614,35 @@ int padata_remove_cpu(struct padata_instance *pinst, int cpu)
 }
 EXPORT_SYMBOL(padata_remove_cpu);
 
-/*
+/**
  * padata_start - start the parallel processing
  *
  * @pinst: padata instance to start
  */
 void padata_start(struct padata_instance *pinst)
 {
-       might_sleep();
-
        mutex_lock(&pinst->lock);
        pinst->flags |= PADATA_INIT;
        mutex_unlock(&pinst->lock);
 }
 EXPORT_SYMBOL(padata_start);
 
-/*
+/**
  * padata_stop - stop the parallel processing
  *
  * @pinst: padata instance to stop
  */
 void padata_stop(struct padata_instance *pinst)
 {
-       might_sleep();
-
        mutex_lock(&pinst->lock);
        pinst->flags &= ~PADATA_INIT;
        mutex_unlock(&pinst->lock);
 }
 EXPORT_SYMBOL(padata_stop);
 
-static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
-                                        unsigned long action, void *hcpu)
+#ifdef CONFIG_HOTPLUG_CPU
+static int padata_cpu_callback(struct notifier_block *nfb,
+                              unsigned long action, void *hcpu)
 {
        int err;
        struct padata_instance *pinst;
@@ -621,8 +692,9 @@ static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
 
        return NOTIFY_OK;
 }
+#endif
 
-/*
+/**
  * padata_alloc - allocate and initialize a padata instance
  *
  * @cpumask: cpumask that padata uses for parallelization
@@ -631,7 +703,6 @@ static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
 struct padata_instance *padata_alloc(const struct cpumask *cpumask,
                                     struct workqueue_struct *wq)
 {
-       int err;
        struct padata_instance *pinst;
        struct parallel_data *pd;
 
@@ -639,6 +710,8 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask,
        if (!pinst)
                goto err;
 
+       get_online_cpus();
+
        pd = padata_alloc_pd(pinst, cpumask);
        if (!pd)
                goto err_free_inst;
@@ -654,31 +727,32 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask,
 
        pinst->flags = 0;
 
+#ifdef CONFIG_HOTPLUG_CPU
        pinst->cpu_notifier.notifier_call = padata_cpu_callback;
        pinst->cpu_notifier.priority = 0;
-       err = register_hotcpu_notifier(&pinst->cpu_notifier);
-       if (err)
-               goto err_free_cpumask;
+       register_hotcpu_notifier(&pinst->cpu_notifier);
+#endif
+
+       put_online_cpus();
 
        mutex_init(&pinst->lock);
 
        return pinst;
 
-err_free_cpumask:
-       free_cpumask_var(pinst->cpumask);
 err_free_pd:
        padata_free_pd(pd);
 err_free_inst:
        kfree(pinst);
+       put_online_cpus();
 err:
        return NULL;
 }
 EXPORT_SYMBOL(padata_alloc);
 
-/*
+/**
  * padata_free - free a padata instance
  *
- * @ padata_inst: padata instance to free
+ * @padata_inst: padata instance to free
  */
 void padata_free(struct padata_instance *pinst)
 {
@@ -686,10 +760,13 @@ void padata_free(struct padata_instance *pinst)
 
        synchronize_rcu();
 
-       while (atomic_read(&pinst->pd->refcnt) != 0)
-               yield();
-
+#ifdef CONFIG_HOTPLUG_CPU
        unregister_hotcpu_notifier(&pinst->cpu_notifier);
+#endif
+       get_online_cpus();
+       padata_flush_queues(pinst->pd);
+       put_online_cpus();
+
        padata_free_pd(pinst->pd);
        free_cpumask_var(pinst->cpumask);
        kfree(pinst);