Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
cast5_avx_glue.c
Go to the documentation of this file.
1 /*
2  * Glue Code for the AVX assembler implemention of the Cast5 Cipher
3  *
4  * Copyright (C) 2012 Johannes Goetzfried
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
20  * USA
21  *
22  */
23 
24 #include <linux/module.h>
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/crypto.h>
28 #include <linux/err.h>
29 #include <crypto/algapi.h>
30 #include <crypto/cast5.h>
31 #include <crypto/cryptd.h>
32 #include <crypto/ctr.h>
33 #include <asm/xcr.h>
34 #include <asm/xsave.h>
35 #include <asm/crypto/ablk_helper.h>
36 #include <asm/crypto/glue_helper.h>
37 
38 #define CAST5_PARALLEL_BLOCKS 16
39 
41  const u8 *src, bool xor);
43  const u8 *src);
44 
45 static inline void cast5_enc_blk_xway(struct cast5_ctx *ctx, u8 *dst,
46  const u8 *src)
47 {
48  __cast5_enc_blk_16way(ctx, dst, src, false);
49 }
50 
51 static inline void cast5_enc_blk_xway_xor(struct cast5_ctx *ctx, u8 *dst,
52  const u8 *src)
53 {
54  __cast5_enc_blk_16way(ctx, dst, src, true);
55 }
56 
57 static inline void cast5_dec_blk_xway(struct cast5_ctx *ctx, u8 *dst,
58  const u8 *src)
59 {
60  cast5_dec_blk_16way(ctx, dst, src);
61 }
62 
63 
64 static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
65 {
66  return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
67  NULL, fpu_enabled, nbytes);
68 }
69 
70 static inline void cast5_fpu_end(bool fpu_enabled)
71 {
72  return glue_fpu_end(fpu_enabled);
73 }
74 
75 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
76  bool enc)
77 {
78  bool fpu_enabled = false;
79  struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
80  const unsigned int bsize = CAST5_BLOCK_SIZE;
81  unsigned int nbytes;
82  int err;
83 
84  err = blkcipher_walk_virt(desc, walk);
86 
87  while ((nbytes = walk->nbytes)) {
88  u8 *wsrc = walk->src.virt.addr;
89  u8 *wdst = walk->dst.virt.addr;
90 
91  fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
92 
93  /* Process multi-block batch */
94  if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
95  do {
96  if (enc)
97  cast5_enc_blk_xway(ctx, wdst, wsrc);
98  else
99  cast5_dec_blk_xway(ctx, wdst, wsrc);
100 
101  wsrc += bsize * CAST5_PARALLEL_BLOCKS;
102  wdst += bsize * CAST5_PARALLEL_BLOCKS;
103  nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
104  } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
105 
106  if (nbytes < bsize)
107  goto done;
108  }
109 
110  /* Handle leftovers */
111  do {
112  if (enc)
113  __cast5_encrypt(ctx, wdst, wsrc);
114  else
115  __cast5_decrypt(ctx, wdst, wsrc);
116 
117  wsrc += bsize;
118  wdst += bsize;
119  nbytes -= bsize;
120  } while (nbytes >= bsize);
121 
122 done:
123  err = blkcipher_walk_done(desc, walk, nbytes);
124  }
125 
126  cast5_fpu_end(fpu_enabled);
127  return err;
128 }
129 
130 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
131  struct scatterlist *src, unsigned int nbytes)
132 {
133  struct blkcipher_walk walk;
134 
135  blkcipher_walk_init(&walk, dst, src, nbytes);
136  return ecb_crypt(desc, &walk, true);
137 }
138 
139 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
140  struct scatterlist *src, unsigned int nbytes)
141 {
142  struct blkcipher_walk walk;
143 
144  blkcipher_walk_init(&walk, dst, src, nbytes);
145  return ecb_crypt(desc, &walk, false);
146 }
147 
148 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
149  struct blkcipher_walk *walk)
150 {
151  struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
152  const unsigned int bsize = CAST5_BLOCK_SIZE;
153  unsigned int nbytes = walk->nbytes;
154  u64 *src = (u64 *)walk->src.virt.addr;
155  u64 *dst = (u64 *)walk->dst.virt.addr;
156  u64 *iv = (u64 *)walk->iv;
157 
158  do {
159  *dst = *src ^ *iv;
160  __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
161  iv = dst;
162 
163  src += 1;
164  dst += 1;
165  nbytes -= bsize;
166  } while (nbytes >= bsize);
167 
168  *(u64 *)walk->iv = *iv;
169  return nbytes;
170 }
171 
172 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
173  struct scatterlist *src, unsigned int nbytes)
174 {
175  struct blkcipher_walk walk;
176  int err;
177 
178  blkcipher_walk_init(&walk, dst, src, nbytes);
179  err = blkcipher_walk_virt(desc, &walk);
180 
181  while ((nbytes = walk.nbytes)) {
182  nbytes = __cbc_encrypt(desc, &walk);
183  err = blkcipher_walk_done(desc, &walk, nbytes);
184  }
185 
186  return err;
187 }
188 
189 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
190  struct blkcipher_walk *walk)
191 {
192  struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
193  const unsigned int bsize = CAST5_BLOCK_SIZE;
194  unsigned int nbytes = walk->nbytes;
195  u64 *src = (u64 *)walk->src.virt.addr;
196  u64 *dst = (u64 *)walk->dst.virt.addr;
197  u64 ivs[CAST5_PARALLEL_BLOCKS - 1];
198  u64 last_iv;
199  int i;
200 
201  /* Start of the last block. */
202  src += nbytes / bsize - 1;
203  dst += nbytes / bsize - 1;
204 
205  last_iv = *src;
206 
207  /* Process multi-block batch */
208  if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
209  do {
210  nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
211  src -= CAST5_PARALLEL_BLOCKS - 1;
212  dst -= CAST5_PARALLEL_BLOCKS - 1;
213 
214  for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
215  ivs[i] = src[i];
216 
217  cast5_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
218 
219  for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
220  *(dst + (i + 1)) ^= *(ivs + i);
221 
222  nbytes -= bsize;
223  if (nbytes < bsize)
224  goto done;
225 
226  *dst ^= *(src - 1);
227  src -= 1;
228  dst -= 1;
229  } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
230 
231  if (nbytes < bsize)
232  goto done;
233  }
234 
235  /* Handle leftovers */
236  for (;;) {
237  __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
238 
239  nbytes -= bsize;
240  if (nbytes < bsize)
241  break;
242 
243  *dst ^= *(src - 1);
244  src -= 1;
245  dst -= 1;
246  }
247 
248 done:
249  *dst ^= *(u64 *)walk->iv;
250  *(u64 *)walk->iv = last_iv;
251 
252  return nbytes;
253 }
254 
255 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
256  struct scatterlist *src, unsigned int nbytes)
257 {
258  bool fpu_enabled = false;
259  struct blkcipher_walk walk;
260  int err;
261 
262  blkcipher_walk_init(&walk, dst, src, nbytes);
263  err = blkcipher_walk_virt(desc, &walk);
265 
266  while ((nbytes = walk.nbytes)) {
267  fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
268  nbytes = __cbc_decrypt(desc, &walk);
269  err = blkcipher_walk_done(desc, &walk, nbytes);
270  }
271 
272  cast5_fpu_end(fpu_enabled);
273  return err;
274 }
275 
276 static void ctr_crypt_final(struct blkcipher_desc *desc,
277  struct blkcipher_walk *walk)
278 {
279  struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
280  u8 *ctrblk = walk->iv;
281  u8 keystream[CAST5_BLOCK_SIZE];
282  u8 *src = walk->src.virt.addr;
283  u8 *dst = walk->dst.virt.addr;
284  unsigned int nbytes = walk->nbytes;
285 
286  __cast5_encrypt(ctx, keystream, ctrblk);
287  crypto_xor(keystream, src, nbytes);
288  memcpy(dst, keystream, nbytes);
289 
290  crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
291 }
292 
293 static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
294  struct blkcipher_walk *walk)
295 {
296  struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
297  const unsigned int bsize = CAST5_BLOCK_SIZE;
298  unsigned int nbytes = walk->nbytes;
299  u64 *src = (u64 *)walk->src.virt.addr;
300  u64 *dst = (u64 *)walk->dst.virt.addr;
301  u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
302  __be64 ctrblocks[CAST5_PARALLEL_BLOCKS];
303  int i;
304 
305  /* Process multi-block batch */
306  if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
307  do {
308  /* create ctrblks for parallel encrypt */
309  for (i = 0; i < CAST5_PARALLEL_BLOCKS; i++) {
310  if (dst != src)
311  dst[i] = src[i];
312 
313  ctrblocks[i] = cpu_to_be64(ctrblk++);
314  }
315 
316  cast5_enc_blk_xway_xor(ctx, (u8 *)dst,
317  (u8 *)ctrblocks);
318 
319  src += CAST5_PARALLEL_BLOCKS;
320  dst += CAST5_PARALLEL_BLOCKS;
321  nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
322  } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
323 
324  if (nbytes < bsize)
325  goto done;
326  }
327 
328  /* Handle leftovers */
329  do {
330  if (dst != src)
331  *dst = *src;
332 
333  ctrblocks[0] = cpu_to_be64(ctrblk++);
334 
335  __cast5_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
336  *dst ^= ctrblocks[0];
337 
338  src += 1;
339  dst += 1;
340  nbytes -= bsize;
341  } while (nbytes >= bsize);
342 
343 done:
344  *(__be64 *)walk->iv = cpu_to_be64(ctrblk);
345  return nbytes;
346 }
347 
348 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
349  struct scatterlist *src, unsigned int nbytes)
350 {
351  bool fpu_enabled = false;
352  struct blkcipher_walk walk;
353  int err;
354 
355  blkcipher_walk_init(&walk, dst, src, nbytes);
356  err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
358 
359  while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
360  fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
361  nbytes = __ctr_crypt(desc, &walk);
362  err = blkcipher_walk_done(desc, &walk, nbytes);
363  }
364 
365  cast5_fpu_end(fpu_enabled);
366 
367  if (walk.nbytes) {
368  ctr_crypt_final(desc, &walk);
369  err = blkcipher_walk_done(desc, &walk, 0);
370  }
371 
372  return err;
373 }
374 
375 
376 static struct crypto_alg cast5_algs[6] = { {
377  .cra_name = "__ecb-cast5-avx",
378  .cra_driver_name = "__driver-ecb-cast5-avx",
379  .cra_priority = 0,
380  .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
381  .cra_blocksize = CAST5_BLOCK_SIZE,
382  .cra_ctxsize = sizeof(struct cast5_ctx),
383  .cra_alignmask = 0,
384  .cra_type = &crypto_blkcipher_type,
385  .cra_module = THIS_MODULE,
386  .cra_u = {
387  .blkcipher = {
388  .min_keysize = CAST5_MIN_KEY_SIZE,
389  .max_keysize = CAST5_MAX_KEY_SIZE,
390  .setkey = cast5_setkey,
391  .encrypt = ecb_encrypt,
392  .decrypt = ecb_decrypt,
393  },
394  },
395 }, {
396  .cra_name = "__cbc-cast5-avx",
397  .cra_driver_name = "__driver-cbc-cast5-avx",
398  .cra_priority = 0,
399  .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
400  .cra_blocksize = CAST5_BLOCK_SIZE,
401  .cra_ctxsize = sizeof(struct cast5_ctx),
402  .cra_alignmask = 0,
403  .cra_type = &crypto_blkcipher_type,
404  .cra_module = THIS_MODULE,
405  .cra_u = {
406  .blkcipher = {
407  .min_keysize = CAST5_MIN_KEY_SIZE,
408  .max_keysize = CAST5_MAX_KEY_SIZE,
409  .setkey = cast5_setkey,
410  .encrypt = cbc_encrypt,
411  .decrypt = cbc_decrypt,
412  },
413  },
414 }, {
415  .cra_name = "__ctr-cast5-avx",
416  .cra_driver_name = "__driver-ctr-cast5-avx",
417  .cra_priority = 0,
418  .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
419  .cra_blocksize = 1,
420  .cra_ctxsize = sizeof(struct cast5_ctx),
421  .cra_alignmask = 0,
422  .cra_type = &crypto_blkcipher_type,
423  .cra_module = THIS_MODULE,
424  .cra_u = {
425  .blkcipher = {
426  .min_keysize = CAST5_MIN_KEY_SIZE,
427  .max_keysize = CAST5_MAX_KEY_SIZE,
428  .ivsize = CAST5_BLOCK_SIZE,
429  .setkey = cast5_setkey,
430  .encrypt = ctr_crypt,
431  .decrypt = ctr_crypt,
432  },
433  },
434 }, {
435  .cra_name = "ecb(cast5)",
436  .cra_driver_name = "ecb-cast5-avx",
437  .cra_priority = 200,
439  .cra_blocksize = CAST5_BLOCK_SIZE,
440  .cra_ctxsize = sizeof(struct async_helper_ctx),
441  .cra_alignmask = 0,
442  .cra_type = &crypto_ablkcipher_type,
443  .cra_module = THIS_MODULE,
444  .cra_init = ablk_init,
445  .cra_exit = ablk_exit,
446  .cra_u = {
447  .ablkcipher = {
448  .min_keysize = CAST5_MIN_KEY_SIZE,
449  .max_keysize = CAST5_MAX_KEY_SIZE,
450  .setkey = ablk_set_key,
451  .encrypt = ablk_encrypt,
452  .decrypt = ablk_decrypt,
453  },
454  },
455 }, {
456  .cra_name = "cbc(cast5)",
457  .cra_driver_name = "cbc-cast5-avx",
458  .cra_priority = 200,
460  .cra_blocksize = CAST5_BLOCK_SIZE,
461  .cra_ctxsize = sizeof(struct async_helper_ctx),
462  .cra_alignmask = 0,
463  .cra_type = &crypto_ablkcipher_type,
464  .cra_module = THIS_MODULE,
465  .cra_init = ablk_init,
466  .cra_exit = ablk_exit,
467  .cra_u = {
468  .ablkcipher = {
469  .min_keysize = CAST5_MIN_KEY_SIZE,
470  .max_keysize = CAST5_MAX_KEY_SIZE,
471  .ivsize = CAST5_BLOCK_SIZE,
472  .setkey = ablk_set_key,
473  .encrypt = __ablk_encrypt,
474  .decrypt = ablk_decrypt,
475  },
476  },
477 }, {
478  .cra_name = "ctr(cast5)",
479  .cra_driver_name = "ctr-cast5-avx",
480  .cra_priority = 200,
482  .cra_blocksize = 1,
483  .cra_ctxsize = sizeof(struct async_helper_ctx),
484  .cra_alignmask = 0,
485  .cra_type = &crypto_ablkcipher_type,
486  .cra_module = THIS_MODULE,
487  .cra_init = ablk_init,
488  .cra_exit = ablk_exit,
489  .cra_u = {
490  .ablkcipher = {
491  .min_keysize = CAST5_MIN_KEY_SIZE,
492  .max_keysize = CAST5_MAX_KEY_SIZE,
493  .ivsize = CAST5_BLOCK_SIZE,
494  .setkey = ablk_set_key,
495  .encrypt = ablk_encrypt,
496  .decrypt = ablk_encrypt,
497  .geniv = "chainiv",
498  },
499  },
500 } };
501 
502 static int __init cast5_init(void)
503 {
504  u64 xcr0;
505 
506  if (!cpu_has_avx || !cpu_has_osxsave) {
507  pr_info("AVX instructions are not detected.\n");
508  return -ENODEV;
509  }
510 
511  xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
512  if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
513  pr_info("AVX detected but unusable.\n");
514  return -ENODEV;
515  }
516 
517  return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
518 }
519 
520 static void __exit cast5_exit(void)
521 {
522  crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
523 }
524 
525 module_init(cast5_init);
526 module_exit(cast5_exit);
527 
528 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
529 MODULE_LICENSE("GPL");
530 MODULE_ALIAS("cast5");