OpenSSL  1.0.1c
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros
e_padlock.c
Go to the documentation of this file.
1 /*
2  * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3  * Written by Michal Ludvig <[email protected]>
4  * http://www.logix.cz/michal
5  *
6  * Big thanks to Andy Polyakov for a help with optimization,
7  * assembler fixes, port to MS Windows and a lot of other
8  * valuable work on this engine!
9  */
10 
11 /* ====================================================================
12  * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  *
18  * 1. Redistributions of source code must retain the above copyright
19  * notice, this list of conditions and the following disclaimer.
20  *
21  * 2. Redistributions in binary form must reproduce the above copyright
22  * notice, this list of conditions and the following disclaimer in
23  * the documentation and/or other materials provided with the
24  * distribution.
25  *
26  * 3. All advertising materials mentioning features or use of this
27  * software must display the following acknowledgment:
28  * "This product includes software developed by the OpenSSL Project
29  * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
30  *
31  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
32  * endorse or promote products derived from this software without
33  * prior written permission. For written permission, please contact
35  *
36  * 5. Products derived from this software may not be called "OpenSSL"
37  * nor may "OpenSSL" appear in their names without prior written
38  * permission of the OpenSSL Project.
39  *
40  * 6. Redistributions of any form whatsoever must retain the following
41  * acknowledgment:
42  * "This product includes software developed by the OpenSSL Project
43  * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
46  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
49  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
52  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
54  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
55  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
56  * OF THE POSSIBILITY OF SUCH DAMAGE.
57  * ====================================================================
58  *
59  * This product includes cryptographic software written by Eric Young
60  * ([email protected]). This product includes software written by Tim
61  * Hudson ([email protected]).
62  *
63  */
64 
65 
66 #include <stdio.h>
67 #include <string.h>
68 
69 #include <openssl/opensslconf.h>
70 #include <openssl/crypto.h>
71 #include <openssl/dso.h>
72 #include <openssl/engine.h>
73 #include <openssl/evp.h>
74 #ifndef OPENSSL_NO_AES
75 #include <openssl/aes.h>
76 #endif
77 #include <openssl/rand.h>
78 #include <openssl/err.h>
79 
80 #ifndef OPENSSL_NO_HW
81 #ifndef OPENSSL_NO_HW_PADLOCK
82 
83 /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
84 #if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
85 # ifndef OPENSSL_NO_DYNAMIC_ENGINE
86 # define DYNAMIC_ENGINE
87 # endif
88 #elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
89 # ifdef ENGINE_DYNAMIC_SUPPORT
90 # define DYNAMIC_ENGINE
91 # endif
92 #else
93 # error "Only OpenSSL >= 0.9.7 is supported"
94 #endif
95 
96 /* VIA PadLock AES is available *ONLY* on some x86 CPUs.
97  Not only that it doesn't exist elsewhere, but it
98  even can't be compiled on other platforms!
99 
100  In addition, because of the heavy use of inline assembler,
101  compiler choice is limited to GCC and Microsoft C. */
102 #undef COMPILE_HW_PADLOCK
103 #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104 # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
105  (defined(_MSC_VER) && defined(_M_IX86))
106 # define COMPILE_HW_PADLOCK
107 # endif
108 #endif
109 
110 #ifdef OPENSSL_NO_DYNAMIC_ENGINE
111 #ifdef COMPILE_HW_PADLOCK
112 static ENGINE *ENGINE_padlock (void);
113 #endif
114 
115 void ENGINE_load_padlock (void)
116 {
117 /* On non-x86 CPUs it just returns. */
118 #ifdef COMPILE_HW_PADLOCK
119  ENGINE *toadd = ENGINE_padlock ();
120  if (!toadd) return;
121  ENGINE_add (toadd);
122  ENGINE_free (toadd);
123  ERR_clear_error ();
124 #endif
125 }
126 
127 #endif
128 
129 #ifdef COMPILE_HW_PADLOCK
130 /* We do these includes here to avoid header problems on platforms that
131  do not have the VIA padlock anyway... */
132 #include <stdlib.h>
133 #ifdef _WIN32
134 # include <malloc.h>
135 # ifndef alloca
136 # define alloca _alloca
137 # endif
138 #elif defined(__GNUC__)
139 # ifndef alloca
140 # define alloca(s) __builtin_alloca(s)
141 # endif
142 #endif
143 
144 /* Function for ENGINE detection and control */
145 static int padlock_available(void);
146 static int padlock_init(ENGINE *e);
147 
148 /* RNG Stuff */
149 static RAND_METHOD padlock_rand;
150 
151 /* Cipher Stuff */
152 #ifndef OPENSSL_NO_AES
153 static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid);
154 #endif
155 
156 /* Engine names */
157 static const char *padlock_id = "padlock";
158 static char padlock_name[100];
159 
160 /* Available features */
161 static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
162 static int padlock_use_rng = 0; /* Random Number Generator */
163 #ifndef OPENSSL_NO_AES
164 static int padlock_aes_align_required = 1;
165 #endif
166 
167 /* ===== Engine "management" functions ===== */
168 
169 /* Prepare the ENGINE structure for registration */
170 static int
171 padlock_bind_helper(ENGINE *e)
172 {
173  /* Check available features */
174  padlock_available();
175 
176 #if 1 /* disable RNG for now, see commentary in vicinity of RNG code */
177  padlock_use_rng=0;
178 #endif
179 
180  /* Generate a nice engine name with available features */
181  BIO_snprintf(padlock_name, sizeof(padlock_name),
182  "VIA PadLock (%s, %s)",
183  padlock_use_rng ? "RNG" : "no-RNG",
184  padlock_use_ace ? "ACE" : "no-ACE");
185 
186  /* Register everything or return with an error */
187  if (!ENGINE_set_id(e, padlock_id) ||
188  !ENGINE_set_name(e, padlock_name) ||
189 
190  !ENGINE_set_init_function(e, padlock_init) ||
191 #ifndef OPENSSL_NO_AES
192  (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) ||
193 #endif
194  (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) {
195  return 0;
196  }
197 
198  /* Everything looks good */
199  return 1;
200 }
201 
202 #ifdef OPENSSL_NO_DYNAMIC_ENGINE
203 
204 /* Constructor */
205 static ENGINE *
206 ENGINE_padlock(void)
207 {
208  ENGINE *eng = ENGINE_new();
209 
210  if (!eng) {
211  return NULL;
212  }
213 
214  if (!padlock_bind_helper(eng)) {
215  ENGINE_free(eng);
216  return NULL;
217  }
218 
219  return eng;
220 }
221 
222 #endif
223 
224 /* Check availability of the engine */
225 static int
226 padlock_init(ENGINE *e)
227 {
228  return (padlock_use_rng || padlock_use_ace);
229 }
230 
231 /* This stuff is needed if this ENGINE is being compiled into a self-contained
232  * shared-library.
233  */
234 #ifdef DYNAMIC_ENGINE
235 static int
236 padlock_bind_fn(ENGINE *e, const char *id)
237 {
238  if (id && (strcmp(id, padlock_id) != 0)) {
239  return 0;
240  }
241 
242  if (!padlock_bind_helper(e)) {
243  return 0;
244  }
245 
246  return 1;
247 }
248 
250 IMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn)
251 #endif /* DYNAMIC_ENGINE */
252 
253 /* ===== Here comes the "real" engine ===== */
254 
255 #ifndef OPENSSL_NO_AES
256 /* Some AES-related constants */
257 #define AES_BLOCK_SIZE 16
258 #define AES_KEY_SIZE_128 16
259 #define AES_KEY_SIZE_192 24
260 #define AES_KEY_SIZE_256 32
261 
262 /* Here we store the status information relevant to the
263  current context. */
264 /* BIG FAT WARNING:
265  * Inline assembler in PADLOCK_XCRYPT_ASM()
266  * depends on the order of items in this structure.
267  * Don't blindly modify, reorder, etc!
268  */
269 struct padlock_cipher_data
270 {
271  unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
272  union { unsigned int pad[4];
273  struct {
274  int rounds:4;
275  int dgst:1; /* n/a in C3 */
276  int align:1; /* n/a in C3 */
277  int ciphr:1; /* n/a in C3 */
278  unsigned int keygen:1;
279  int interm:1;
280  unsigned int encdec:1;
281  int ksize:2;
282  } b;
283  } cword; /* Control word */
284  AES_KEY ks; /* Encryption key */
285 };
286 
287 /*
288  * Essentially this variable belongs in thread local storage.
289  * Having this variable global on the other hand can only cause
290  * few bogus key reloads [if any at all on single-CPU system],
291  * so we accept the penatly...
292  */
293 static volatile struct padlock_cipher_data *padlock_saved_context;
294 #endif
295 
296 /*
297  * =======================================================
298  * Inline assembler section(s).
299  * =======================================================
300  * Order of arguments is chosen to facilitate Windows port
301  * using __fastcall calling convention. If you wish to add
302  * more routines, keep in mind that first __fastcall
303  * argument is passed in %ecx and second - in %edx.
304  * =======================================================
305  */
306 #if defined(__GNUC__) && __GNUC__>=2
307 /*
308  * As for excessive "push %ebx"/"pop %ebx" found all over.
309  * When generating position-independent code GCC won't let
310  * us use "b" in assembler templates nor even respect "ebx"
311  * in "clobber description." Therefore the trouble...
312  */
313 
314 /* Helper function - check if a CPUID instruction
315  is available on this CPU */
316 static int
317 padlock_insn_cpuid_available(void)
318 {
319  int result = -1;
320 
321  /* We're checking if the bit #21 of EFLAGS
322  can be toggled. If yes = CPUID is available. */
323  asm volatile (
324  "pushf\n"
325  "popl %%eax\n"
326  "xorl $0x200000, %%eax\n"
327  "movl %%eax, %%ecx\n"
328  "andl $0x200000, %%ecx\n"
329  "pushl %%eax\n"
330  "popf\n"
331  "pushf\n"
332  "popl %%eax\n"
333  "andl $0x200000, %%eax\n"
334  "xorl %%eax, %%ecx\n"
335  "movl %%ecx, %0\n"
336  : "=r" (result) : : "eax", "ecx");
337 
338  return (result == 0);
339 }
340 
341 /* Load supported features of the CPU to see if
342  the PadLock is available. */
343 static int
344 padlock_available(void)
345 {
346  char vendor_string[16];
347  unsigned int eax, edx;
348 
349  /* First check if the CPUID instruction is available at all... */
350  if (! padlock_insn_cpuid_available())
351  return 0;
352 
353  /* Are we running on the Centaur (VIA) CPU? */
354  eax = 0x00000000;
355  vendor_string[12] = 0;
356  asm volatile (
357  "pushl %%ebx\n"
358  "cpuid\n"
359  "movl %%ebx,(%%edi)\n"
360  "movl %%edx,4(%%edi)\n"
361  "movl %%ecx,8(%%edi)\n"
362  "popl %%ebx"
363  : "+a"(eax) : "D"(vendor_string) : "ecx", "edx");
364  if (strcmp(vendor_string, "CentaurHauls") != 0)
365  return 0;
366 
367  /* Check for Centaur Extended Feature Flags presence */
368  eax = 0xC0000000;
369  asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
370  : "+a"(eax) : : "ecx", "edx");
371  if (eax < 0xC0000001)
372  return 0;
373 
374  /* Read the Centaur Extended Feature Flags */
375  eax = 0xC0000001;
376  asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
377  : "+a"(eax), "=d"(edx) : : "ecx");
378 
379  /* Fill up some flags */
380  padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
381  padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
382 
383  return padlock_use_ace + padlock_use_rng;
384 }
385 
386 #ifndef OPENSSL_NO_AES
387 /* Our own htonl()/ntohl() */
388 static inline void
389 padlock_bswapl(AES_KEY *ks)
390 {
391  size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
392  unsigned int *key = ks->rd_key;
393 
394  while (i--) {
395  asm volatile ("bswapl %0" : "+r"(*key));
396  key++;
397  }
398 }
399 #endif
400 
401 /* Force key reload from memory to the CPU microcode.
402  Loading EFLAGS from the stack clears EFLAGS[30]
403  which does the trick. */
404 static inline void
405 padlock_reload_key(void)
406 {
407  asm volatile ("pushfl; popfl");
408 }
409 
410 #ifndef OPENSSL_NO_AES
411 /*
412  * This is heuristic key context tracing. At first one
413  * believes that one should use atomic swap instructions,
414  * but it's not actually necessary. Point is that if
415  * padlock_saved_context was changed by another thread
416  * after we've read it and before we compare it with cdata,
417  * our key *shall* be reloaded upon thread context switch
418  * and we are therefore set in either case...
419  */
420 static inline void
421 padlock_verify_context(struct padlock_cipher_data *cdata)
422 {
423  asm volatile (
424  "pushfl\n"
425 " btl $30,(%%esp)\n"
426 " jnc 1f\n"
427 " cmpl %2,%1\n"
428 " je 1f\n"
429 " popfl\n"
430 " subl $4,%%esp\n"
431 "1: addl $4,%%esp\n"
432 " movl %2,%0"
433  :"+m"(padlock_saved_context)
434  : "r"(padlock_saved_context), "r"(cdata) : "cc");
435 }
436 
437 /* Template for padlock_xcrypt_* modes */
438 /* BIG FAT WARNING:
439  * The offsets used with 'leal' instructions
440  * describe items of the 'padlock_cipher_data'
441  * structure.
442  */
443 #define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
444 static inline void *name(size_t cnt, \
445  struct padlock_cipher_data *cdata, \
446  void *out, const void *inp) \
447 { void *iv; \
448  asm volatile ( "pushl %%ebx\n" \
449  " leal 16(%0),%%edx\n" \
450  " leal 32(%0),%%ebx\n" \
451  rep_xcrypt "\n" \
452  " popl %%ebx" \
453  : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
454  : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
455  : "edx", "cc", "memory"); \
456  return iv; \
457 }
458 
459 /* Generate all functions with appropriate opcodes */
460 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */
461 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */
462 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */
463 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */
464 #endif
465 
466 /* The RNG call itself */
467 static inline unsigned int
468 padlock_xstore(void *addr, unsigned int edx_in)
469 {
470  unsigned int eax_out;
471 
472  asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
473  : "=a"(eax_out),"=m"(*(unsigned *)addr)
474  : "D"(addr), "d" (edx_in)
475  );
476 
477  return eax_out;
478 }
479 
480 /* Why not inline 'rep movsd'? I failed to find information on what
481  * value in Direction Flag one can expect and consequently have to
482  * apply "better-safe-than-sorry" approach and assume "undefined."
483  * I could explicitly clear it and restore the original value upon
484  * return from padlock_aes_cipher, but it's presumably too much
485  * trouble for too little gain...
486  *
487  * In case you wonder 'rep xcrypt*' instructions above are *not*
488  * affected by the Direction Flag and pointers advance toward
489  * larger addresses unconditionally.
490  */
491 static inline unsigned char *
492 padlock_memcpy(void *dst,const void *src,size_t n)
493 {
494  long *d=dst;
495  const long *s=src;
496 
497  n /= sizeof(*d);
498  do { *d++ = *s++; } while (--n);
499 
500  return dst;
501 }
502 
503 #elif defined(_MSC_VER)
504 /*
505  * Unlike GCC these are real functions. In order to minimize impact
506  * on performance we adhere to __fastcall calling convention in
507  * order to get two first arguments passed through %ecx and %edx.
508  * Which kind of suits very well, as instructions in question use
509  * both %ecx and %edx as input:-)
510  */
511 #define REP_XCRYPT(code) \
512  _asm _emit 0xf3 \
513  _asm _emit 0x0f _asm _emit 0xa7 \
514  _asm _emit code
515 
516 /* BIG FAT WARNING:
517  * The offsets used with 'lea' instructions
518  * describe items of the 'padlock_cipher_data'
519  * structure.
520  */
521 #define PADLOCK_XCRYPT_ASM(name,code) \
522 static void * __fastcall \
523  name (size_t cnt, void *cdata, \
524  void *outp, const void *inp) \
525 { _asm mov eax,edx \
526  _asm lea edx,[eax+16] \
527  _asm lea ebx,[eax+32] \
528  _asm mov edi,outp \
529  _asm mov esi,inp \
530  REP_XCRYPT(code) \
531 }
532 
533 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
534 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
535 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
536 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
537 
538 static int __fastcall
539 padlock_xstore(void *outp,unsigned int code)
540 { _asm mov edi,ecx
541  _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
542 }
543 
544 static void __fastcall
545 padlock_reload_key(void)
546 { _asm pushfd _asm popfd }
547 
548 static void __fastcall
549 padlock_verify_context(void *cdata)
550 { _asm {
551  pushfd
552  bt DWORD PTR[esp],30
553  jnc skip
554  cmp ecx,padlock_saved_context
555  je skip
556  popfd
557  sub esp,4
558  skip: add esp,4
559  mov padlock_saved_context,ecx
560  }
561 }
562 
563 static int
564 padlock_available(void)
565 { _asm {
566  pushfd
567  pop eax
568  mov ecx,eax
569  xor eax,1<<21
570  push eax
571  popfd
572  pushfd
573  pop eax
574  xor eax,ecx
575  bt eax,21
576  jnc noluck
577  mov eax,0
578  cpuid
579  xor eax,eax
580  cmp ebx,'tneC'
581  jne noluck
582  cmp edx,'Hrua'
583  jne noluck
584  cmp ecx,'slua'
585  jne noluck
586  mov eax,0xC0000000
587  cpuid
588  mov edx,eax
589  xor eax,eax
590  cmp edx,0xC0000001
591  jb noluck
592  mov eax,0xC0000001
593  cpuid
594  xor eax,eax
595  bt edx,6
596  jnc skip_a
597  bt edx,7
598  jnc skip_a
599  mov padlock_use_ace,1
600  inc eax
601  skip_a: bt edx,2
602  jnc skip_r
603  bt edx,3
604  jnc skip_r
605  mov padlock_use_rng,1
606  inc eax
607  skip_r:
608  noluck:
609  }
610 }
611 
612 static void __fastcall
613 padlock_bswapl(void *key)
614 { _asm {
615  pushfd
616  cld
617  mov esi,ecx
618  mov edi,ecx
619  mov ecx,60
620  up: lodsd
621  bswap eax
622  stosd
623  loop up
624  popfd
625  }
626 }
627 
628 /* MS actually specifies status of Direction Flag and compiler even
629  * manages to compile following as 'rep movsd' all by itself...
630  */
631 #define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
632 #endif
633 
634 /* ===== AES encryption/decryption ===== */
635 #ifndef OPENSSL_NO_AES
636 
637 #if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
638 #define NID_aes_128_cfb NID_aes_128_cfb128
639 #endif
640 
641 #if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
642 #define NID_aes_128_ofb NID_aes_128_ofb128
643 #endif
644 
645 #if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
646 #define NID_aes_192_cfb NID_aes_192_cfb128
647 #endif
648 
649 #if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
650 #define NID_aes_192_ofb NID_aes_192_ofb128
651 #endif
652 
653 #if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
654 #define NID_aes_256_cfb NID_aes_256_cfb128
655 #endif
656 
657 #if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
658 #define NID_aes_256_ofb NID_aes_256_ofb128
659 #endif
660 
661 /* List of supported ciphers. */
662 static int padlock_cipher_nids[] = {
665  NID_aes_128_cfb,
666  NID_aes_128_ofb,
667 
670  NID_aes_192_cfb,
671  NID_aes_192_ofb,
672 
675  NID_aes_256_cfb,
676  NID_aes_256_ofb,
677 };
678 static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/
679  sizeof(padlock_cipher_nids[0]));
680 
681 /* Function prototypes ... */
682 static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
683  const unsigned char *iv, int enc);
684 static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
685  const unsigned char *in, size_t nbytes);
686 
687 #define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \
688  ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) )
689 #define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
690  NEAREST_ALIGNED(ctx->cipher_data))
691 
692 #define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
693 #define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
694 #define EVP_CIPHER_block_size_OFB 1
695 #define EVP_CIPHER_block_size_CFB 1
696 
697 /* Declaring so many ciphers by hand would be a pain.
698  Instead introduce a bit of preprocessor magic :-) */
699 #define DECLARE_AES_EVP(ksize,lmode,umode) \
700 static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \
701  NID_aes_##ksize##_##lmode, \
702  EVP_CIPHER_block_size_##umode, \
703  AES_KEY_SIZE_##ksize, \
704  AES_BLOCK_SIZE, \
705  0 | EVP_CIPH_##umode##_MODE, \
706  padlock_aes_init_key, \
707  padlock_aes_cipher, \
708  NULL, \
709  sizeof(struct padlock_cipher_data) + 16, \
710  EVP_CIPHER_set_asn1_iv, \
711  EVP_CIPHER_get_asn1_iv, \
712  NULL, \
713  NULL \
714 }
715 
716 DECLARE_AES_EVP(128,ecb,ECB);
717 DECLARE_AES_EVP(128,cbc,CBC);
718 DECLARE_AES_EVP(128,cfb,CFB);
719 DECLARE_AES_EVP(128,ofb,OFB);
720 
721 DECLARE_AES_EVP(192,ecb,ECB);
722 DECLARE_AES_EVP(192,cbc,CBC);
723 DECLARE_AES_EVP(192,cfb,CFB);
724 DECLARE_AES_EVP(192,ofb,OFB);
725 
726 DECLARE_AES_EVP(256,ecb,ECB);
727 DECLARE_AES_EVP(256,cbc,CBC);
728 DECLARE_AES_EVP(256,cfb,CFB);
729 DECLARE_AES_EVP(256,ofb,OFB);
730 
731 static int
732 padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid)
733 {
734  /* No specific cipher => return a list of supported nids ... */
735  if (!cipher) {
736  *nids = padlock_cipher_nids;
737  return padlock_cipher_nids_num;
738  }
739 
740  /* ... or the requested "cipher" otherwise */
741  switch (nid) {
742  case NID_aes_128_ecb:
743  *cipher = &padlock_aes_128_ecb;
744  break;
745  case NID_aes_128_cbc:
746  *cipher = &padlock_aes_128_cbc;
747  break;
748  case NID_aes_128_cfb:
749  *cipher = &padlock_aes_128_cfb;
750  break;
751  case NID_aes_128_ofb:
752  *cipher = &padlock_aes_128_ofb;
753  break;
754 
755  case NID_aes_192_ecb:
756  *cipher = &padlock_aes_192_ecb;
757  break;
758  case NID_aes_192_cbc:
759  *cipher = &padlock_aes_192_cbc;
760  break;
761  case NID_aes_192_cfb:
762  *cipher = &padlock_aes_192_cfb;
763  break;
764  case NID_aes_192_ofb:
765  *cipher = &padlock_aes_192_ofb;
766  break;
767 
768  case NID_aes_256_ecb:
769  *cipher = &padlock_aes_256_ecb;
770  break;
771  case NID_aes_256_cbc:
772  *cipher = &padlock_aes_256_cbc;
773  break;
774  case NID_aes_256_cfb:
775  *cipher = &padlock_aes_256_cfb;
776  break;
777  case NID_aes_256_ofb:
778  *cipher = &padlock_aes_256_ofb;
779  break;
780 
781  default:
782  /* Sorry, we don't support this NID */
783  *cipher = NULL;
784  return 0;
785  }
786 
787  return 1;
788 }
789 
790 /* Prepare the encryption key for PadLock usage */
791 static int
792 padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
793  const unsigned char *iv, int enc)
794 {
795  struct padlock_cipher_data *cdata;
796  int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
797 
798  if (key==NULL) return 0; /* ERROR */
799 
800  cdata = ALIGNED_CIPHER_DATA(ctx);
801  memset(cdata, 0, sizeof(struct padlock_cipher_data));
802 
803  /* Prepare Control word. */
805  cdata->cword.b.encdec = 0;
806  else
807  cdata->cword.b.encdec = (ctx->encrypt == 0);
808  cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
809  cdata->cword.b.ksize = (key_len - 128) / 64;
810 
811  switch(key_len) {
812  case 128:
813  /* PadLock can generate an extended key for
814  AES128 in hardware */
815  memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
816  cdata->cword.b.keygen = 0;
817  break;
818 
819  case 192:
820  case 256:
821  /* Generate an extended AES key in software.
822  Needed for AES192/AES256 */
823  /* Well, the above applies to Stepping 8 CPUs
824  and is listed as hardware errata. They most
825  likely will fix it at some point and then
826  a check for stepping would be due here. */
829  enc)
830  AES_set_encrypt_key(key, key_len, &cdata->ks);
831  else
832  AES_set_decrypt_key(key, key_len, &cdata->ks);
833 #ifndef AES_ASM
834  /* OpenSSL C functions use byte-swapped extended key. */
835  padlock_bswapl(&cdata->ks);
836 #endif
837  cdata->cword.b.keygen = 1;
838  break;
839 
840  default:
841  /* ERROR */
842  return 0;
843  }
844 
845  /*
846  * This is done to cover for cases when user reuses the
847  * context for new key. The catch is that if we don't do
848  * this, padlock_eas_cipher might proceed with old key...
849  */
850  padlock_reload_key ();
851 
852  return 1;
853 }
854 
855 /*
856  * Simplified version of padlock_aes_cipher() used when
857  * 1) both input and output buffers are at aligned addresses.
858  * or when
859  * 2) running on a newer CPU that doesn't require aligned buffers.
860  */
861 static int
862 padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
863  const unsigned char *in_arg, size_t nbytes)
864 {
865  struct padlock_cipher_data *cdata;
866  void *iv;
867 
868  cdata = ALIGNED_CIPHER_DATA(ctx);
869  padlock_verify_context(cdata);
870 
871  switch (EVP_CIPHER_CTX_mode(ctx)) {
872  case EVP_CIPH_ECB_MODE:
873  padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
874  break;
875 
876  case EVP_CIPH_CBC_MODE:
877  memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
878  iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
879  memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
880  break;
881 
882  case EVP_CIPH_CFB_MODE:
883  memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
884  iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
885  memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
886  break;
887 
888  case EVP_CIPH_OFB_MODE:
889  memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
890  padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
891  memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
892  break;
893 
894  default:
895  return 0;
896  }
897 
898  memset(cdata->iv, 0, AES_BLOCK_SIZE);
899 
900  return 1;
901 }
902 
903 #ifndef PADLOCK_CHUNK
904 # define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */
905 #endif
906 #if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
907 # error "insane PADLOCK_CHUNK..."
908 #endif
909 
910 /* Re-align the arguments to 16-Bytes boundaries and run the
911  encryption function itself. This function is not AES-specific. */
912 static int
913 padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
914  const unsigned char *in_arg, size_t nbytes)
915 {
916  struct padlock_cipher_data *cdata;
917  const void *inp;
918  unsigned char *out;
919  void *iv;
920  int inp_misaligned, out_misaligned, realign_in_loop;
921  size_t chunk, allocated=0;
922 
923  /* ctx->num is maintained in byte-oriented modes,
924  such as CFB and OFB... */
925  if ((chunk = ctx->num)) { /* borrow chunk variable */
926  unsigned char *ivp=ctx->iv;
927 
928  switch (EVP_CIPHER_CTX_mode(ctx)) {
929  case EVP_CIPH_CFB_MODE:
930  if (chunk >= AES_BLOCK_SIZE)
931  return 0; /* bogus value */
932 
933  if (ctx->encrypt)
934  while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
935  ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
936  chunk++, nbytes--;
937  }
938  else while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
939  unsigned char c = *(in_arg++);
940  *(out_arg++) = c ^ ivp[chunk];
941  ivp[chunk++] = c, nbytes--;
942  }
943 
944  ctx->num = chunk%AES_BLOCK_SIZE;
945  break;
946  case EVP_CIPH_OFB_MODE:
947  if (chunk >= AES_BLOCK_SIZE)
948  return 0; /* bogus value */
949 
950  while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
951  *(out_arg++) = *(in_arg++) ^ ivp[chunk];
952  chunk++, nbytes--;
953  }
954 
955  ctx->num = chunk%AES_BLOCK_SIZE;
956  break;
957  }
958  }
959 
960  if (nbytes == 0)
961  return 1;
962 #if 0
963  if (nbytes % AES_BLOCK_SIZE)
964  return 0; /* are we expected to do tail processing? */
965 #else
966  /* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC
967  modes and arbitrary value in byte-oriented modes, such as
968  CFB and OFB... */
969 #endif
970 
971  /* VIA promises CPUs that won't require alignment in the future.
972  For now padlock_aes_align_required is initialized to 1 and
973  the condition is never met... */
974  /* C7 core is capable to manage unaligned input in non-ECB[!]
975  mode, but performance penalties appear to be approximately
976  same as for software alignment below or ~3x. They promise to
977  improve it in the future, but for now we can just as well
978  pretend that it can only handle aligned input... */
979  if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0)
980  return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
981 
982  inp_misaligned = (((size_t)in_arg) & 0x0F);
983  out_misaligned = (((size_t)out_arg) & 0x0F);
984 
985  /* Note that even if output is aligned and input not,
986  * I still prefer to loop instead of copy the whole
987  * input and then encrypt in one stroke. This is done
988  * in order to improve L1 cache utilization... */
989  realign_in_loop = out_misaligned|inp_misaligned;
990 
991  if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0)
992  return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
993 
994  /* this takes one "if" out of the loops */
995  chunk = nbytes;
996  chunk %= PADLOCK_CHUNK;
997  if (chunk==0) chunk = PADLOCK_CHUNK;
998 
999  if (out_misaligned) {
1000  /* optmize for small input */
1001  allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes);
1002  out = alloca(0x10 + allocated);
1003  out = NEAREST_ALIGNED(out);
1004  }
1005  else
1006  out = out_arg;
1007 
1008  cdata = ALIGNED_CIPHER_DATA(ctx);
1009  padlock_verify_context(cdata);
1010 
1011  switch (EVP_CIPHER_CTX_mode(ctx)) {
1012  case EVP_CIPH_ECB_MODE:
1013  do {
1014  if (inp_misaligned)
1015  inp = padlock_memcpy(out, in_arg, chunk);
1016  else
1017  inp = in_arg;
1018  in_arg += chunk;
1019 
1020  padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1021 
1022  if (out_misaligned)
1023  out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1024  else
1025  out = out_arg+=chunk;
1026 
1027  nbytes -= chunk;
1028  chunk = PADLOCK_CHUNK;
1029  } while (nbytes);
1030  break;
1031 
1032  case EVP_CIPH_CBC_MODE:
1033  memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1034  goto cbc_shortcut;
1035  do {
1036  if (iv != cdata->iv)
1037  memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1038  chunk = PADLOCK_CHUNK;
1039  cbc_shortcut: /* optimize for small input */
1040  if (inp_misaligned)
1041  inp = padlock_memcpy(out, in_arg, chunk);
1042  else
1043  inp = in_arg;
1044  in_arg += chunk;
1045 
1046  iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1047 
1048  if (out_misaligned)
1049  out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1050  else
1051  out = out_arg+=chunk;
1052 
1053  } while (nbytes -= chunk);
1054  memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1055  break;
1056 
1057  case EVP_CIPH_CFB_MODE:
1058  memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1059  chunk &= ~(AES_BLOCK_SIZE-1);
1060  if (chunk) goto cfb_shortcut;
1061  else goto cfb_skiploop;
1062  do {
1063  if (iv != cdata->iv)
1064  memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1065  chunk = PADLOCK_CHUNK;
1066  cfb_shortcut: /* optimize for small input */
1067  if (inp_misaligned)
1068  inp = padlock_memcpy(out, in_arg, chunk);
1069  else
1070  inp = in_arg;
1071  in_arg += chunk;
1072 
1073  iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1074 
1075  if (out_misaligned)
1076  out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1077  else
1078  out = out_arg+=chunk;
1079 
1080  nbytes -= chunk;
1081  } while (nbytes >= AES_BLOCK_SIZE);
1082 
1083  cfb_skiploop:
1084  if (nbytes) {
1085  unsigned char *ivp = cdata->iv;
1086 
1087  if (iv != ivp) {
1088  memcpy(ivp, iv, AES_BLOCK_SIZE);
1089  iv = ivp;
1090  }
1091  ctx->num = nbytes;
1092  if (cdata->cword.b.encdec) {
1093  cdata->cword.b.encdec=0;
1094  padlock_reload_key();
1095  padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1096  cdata->cword.b.encdec=1;
1097  padlock_reload_key();
1098  while(nbytes) {
1099  unsigned char c = *(in_arg++);
1100  *(out_arg++) = c ^ *ivp;
1101  *(ivp++) = c, nbytes--;
1102  }
1103  }
1104  else { padlock_reload_key();
1105  padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1106  padlock_reload_key();
1107  while (nbytes) {
1108  *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
1109  ivp++, nbytes--;
1110  }
1111  }
1112  }
1113 
1114  memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1115  break;
1116 
1117  case EVP_CIPH_OFB_MODE:
1118  memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1119  chunk &= ~(AES_BLOCK_SIZE-1);
1120  if (chunk) do {
1121  if (inp_misaligned)
1122  inp = padlock_memcpy(out, in_arg, chunk);
1123  else
1124  inp = in_arg;
1125  in_arg += chunk;
1126 
1127  padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1128 
1129  if (out_misaligned)
1130  out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1131  else
1132  out = out_arg+=chunk;
1133 
1134  nbytes -= chunk;
1135  chunk = PADLOCK_CHUNK;
1136  } while (nbytes >= AES_BLOCK_SIZE);
1137 
1138  if (nbytes) {
1139  unsigned char *ivp = cdata->iv;
1140 
1141  ctx->num = nbytes;
1142  padlock_reload_key(); /* empirically found */
1143  padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1144  padlock_reload_key(); /* empirically found */
1145  while (nbytes) {
1146  *(out_arg++) = *(in_arg++) ^ *ivp;
1147  ivp++, nbytes--;
1148  }
1149  }
1150 
1151  memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
1152  break;
1153 
1154  default:
1155  return 0;
1156  }
1157 
1158  /* Clean the realign buffer if it was used */
1159  if (out_misaligned) {
1160  volatile unsigned long *p=(void *)out;
1161  size_t n = allocated/sizeof(*p);
1162  while (n--) *p++=0;
1163  }
1164 
1165  memset(cdata->iv, 0, AES_BLOCK_SIZE);
1166 
1167  return 1;
1168 }
1169 
1170 #endif /* OPENSSL_NO_AES */
1171 
1172 /* ===== Random Number Generator ===== */
1173 /*
1174  * This code is not engaged. The reason is that it does not comply
1175  * with recommendations for VIA RNG usage for secure applications
1176  * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1177  * provide meaningful error control...
1178  */
1179 /* Wrapper that provides an interface between the API and
1180  the raw PadLock RNG */
1181 static int
1182 padlock_rand_bytes(unsigned char *output, int count)
1183 {
1184  unsigned int eax, buf;
1185 
1186  while (count >= 8) {
1187  eax = padlock_xstore(output, 0);
1188  if (!(eax&(1<<6))) return 0; /* RNG disabled */
1189  /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1190  if (eax&(0x1F<<10)) return 0;
1191  if ((eax&0x1F)==0) continue; /* no data, retry... */
1192  if ((eax&0x1F)!=8) return 0; /* fatal failure... */
1193  output += 8;
1194  count -= 8;
1195  }
1196  while (count > 0) {
1197  eax = padlock_xstore(&buf, 3);
1198  if (!(eax&(1<<6))) return 0; /* RNG disabled */
1199  /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1200  if (eax&(0x1F<<10)) return 0;
1201  if ((eax&0x1F)==0) continue; /* no data, retry... */
1202  if ((eax&0x1F)!=1) return 0; /* fatal failure... */
1203  *output++ = (unsigned char)buf;
1204  count--;
1205  }
1206  *(volatile unsigned int *)&buf=0;
1207 
1208  return 1;
1209 }
1210 
1211 /* Dummy but necessary function */
1212 static int
1213 padlock_rand_status(void)
1214 {
1215  return 1;
1216 }
1217 
1218 /* Prepare structure for registration */
1219 static RAND_METHOD padlock_rand = {
1220  NULL, /* seed */
1221  padlock_rand_bytes, /* bytes */
1222  NULL, /* cleanup */
1223  NULL, /* add */
1224  padlock_rand_bytes, /* pseudorand */
1225  padlock_rand_status, /* rand status */
1226 };
1227 
1228 #else /* !COMPILE_HW_PADLOCK */
1229 #ifndef OPENSSL_NO_DYNAMIC_ENGINE
1231 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns);
1233 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns) { return 0; }
1235 #endif
1236 #endif /* COMPILE_HW_PADLOCK */
1237 
1238 #endif /* !OPENSSL_NO_HW_PADLOCK */
1239 #endif /* !OPENSSL_NO_HW */