cryptlib  3.4.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Macros
aesopt.h
Go to the documentation of this file.
1 /*
2  ---------------------------------------------------------------------------
3  Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
4 
5  LICENSE TERMS
6 
7  The free distribution and use of this software is allowed (with or without
8  changes) provided that:
9 
10  1. source code distributions include the above copyright notice, this
11  list of conditions and the following disclaimer;
12 
13  2. binary distributions include the above copyright notice, this list
14  of conditions and the following disclaimer in their documentation;
15 
16  3. the name of the copyright holder is not used to endorse products
17  built using this software without specific written permission.
18 
19  DISCLAIMER
20 
21  This software is provided 'as is' with no explicit or implied warranties
22  in respect of its properties, including, but not limited to, correctness
23  and/or fitness for purpose.
24  ---------------------------------------------------------------------------
25  Issue Date: 20/12/2007
26 
27  This file contains the compilation options for AES (Rijndael) and code
28  that is common across encryption, key scheduling and table generation.
29 
30  OPERATION
31 
32  These source code files implement the AES algorithm Rijndael designed by
33  Joan Daemen and Vincent Rijmen. This version is designed for the standard
34  block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
35  and 32 bytes).
36 
37  This version is designed for flexibility and speed using operations on
38  32-bit words rather than operations on bytes. It can be compiled with
39  either big or little endian internal byte order but is faster when the
40  native byte order for the processor is used.
41 
42  THE CIPHER INTERFACE
43 
44  The cipher interface is implemented as an array of bytes in which lower
45  AES bit sequence indexes map to higher numeric significance within bytes.
46 
47  uint_8t (an unsigned 8-bit type)
48  uint_32t (an unsigned 32-bit type)
49  struct aes_encrypt_ctx (structure for the cipher encryption context)
50  struct aes_decrypt_ctx (structure for the cipher decryption context)
51  AES_RETURN the function return type
52 
53  C subroutine calls:
54 
55  AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
56  AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);
57  AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
58  AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out,
59  const aes_encrypt_ctx cx[1]);
60 
61  AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
62  AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);
63  AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
64  AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out,
65  const aes_decrypt_ctx cx[1]);
66 
67  IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that
68  you call aes_init() before AES is used so that the tables are initialised.
69 
70  C++ aes class subroutines:
71 
72  Class AESencrypt for encryption
73 
74  Construtors:
75  AESencrypt(void)
76  AESencrypt(const unsigned char *key) - 128 bit key
77  Members:
78  AES_RETURN key128(const unsigned char *key)
79  AES_RETURN key192(const unsigned char *key)
80  AES_RETURN key256(const unsigned char *key)
81  AES_RETURN encrypt(const unsigned char *in, unsigned char *out) const
82 
83  Class AESdecrypt for encryption
84  Construtors:
85  AESdecrypt(void)
86  AESdecrypt(const unsigned char *key) - 128 bit key
87  Members:
88  AES_RETURN key128(const unsigned char *key)
89  AES_RETURN key192(const unsigned char *key)
90  AES_RETURN key256(const unsigned char *key)
91  AES_RETURN decrypt(const unsigned char *in, unsigned char *out) const
92 */
93 
94 #if !defined( _AESOPT_H )
95 #define _AESOPT_H
96 
97 /* Cryptlib options: Enable use of asm encryption with C key schedule (the
98  fastest combination) and use VIA ACE if possible, but not under WinCE,
99  both because it's possible that we could be running on an oddball
100  embedded x86 that doesn't understand the necessary opcodes, and because
101  eVC++ doesn't understand some of the extensions used in the ACE-support
102  code - pcg */
103 
104 #if defined( _MSC_VER ) && ( _MSC_VER > 800 ) && \
105  defined( _M_IX86 ) && \
106  !( defined( _WIN32_WCE ) || defined( NO_ASM ) )
107  #ifndef USE_VIA_ACE_IF_PRESENT
108  #define USE_VIA_ACE_IF_PRESENT
109  #endif
110 #endif /* VC++ on x86 under Win32 - pcg */
111 
112 #if defined( _MSC_VER ) && ( _MSC_VER > 800 ) && \
113  !( defined( _WIN32_WCE ) || defined( NO_ASM ) )
114  /* The apparently redundant guards are necessary in case users manually
115  define the values to enable various asm options */
116  #if defined( _M_X64 )
117  #ifndef ASM_AMD64_C
118 /* #define ASM_AMD64_C - Currently not used */
119  #endif /* ASM_AMD64_C */
120  #elif defined( _M_IX86 )
121  #ifndef ASM_X86_V2C
122  #define ASM_X86_V2C
123  #endif /* ASM_X86_V2C */
124  #else
125  #define NO_ASM
126  #endif /* Different x86 architectures */
127 #endif /* VC++ on x86 under Win32 - pcg */
128 
129 #if defined( INC_ALL )
130  #include "aes.h"
131 #else
132  #include "crypt/aes.h"
133 #endif /* Compiler-specific includes - pcg */
134 
135 /* PLATFORM SPECIFIC INCLUDES */
136 
137 #if defined( INC_ALL )
138  #include "brg_endian.h"
139 #else
140  #include "crypt/brg_endian.h"
141 #endif /* Compiler-specific includes - pcg */
142 
143 /* CONFIGURATION - THE USE OF DEFINES
144 
145  Later in this section there are a number of defines that control the
146  operation of the code. In each section, the purpose of each define is
147  explained so that the relevant form can be included or excluded by
148  setting either 1's or 0's respectively on the branches of the related
149  #if clauses. The following local defines should not be changed.
150 */
151 
152 #define ENCRYPTION_IN_C 1
153 #define DECRYPTION_IN_C 2
154 #define ENC_KEYING_IN_C 4
155 #define DEC_KEYING_IN_C 8
156 
157 #define NO_TABLES 0
158 #define ONE_TABLE 1
159 #define FOUR_TABLES 4
160 #define NONE 0
161 #define PARTIAL 1
162 #define FULL 2
163 
164 /* --- START OF USER CONFIGURED OPTIONS --- */
165 
166 /* 1. BYTE ORDER WITHIN 32 BIT WORDS
167 
168  The fundamental data processing units in Rijndael are 8-bit bytes. The
169  input, output and key input are all enumerated arrays of bytes in which
170  bytes are numbered starting at zero and increasing to one less than the
171  number of bytes in the array in question. This enumeration is only used
172  for naming bytes and does not imply any adjacency or order relationship
173  from one byte to another. When these inputs and outputs are considered
174  as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
175  byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
176  In this implementation bits are numbered from 0 to 7 starting at the
177  numerically least significant end of each byte (bit n represents 2^n).
178 
179  However, Rijndael can be implemented more efficiently using 32-bit
180  words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
181  into word[n]. While in principle these bytes can be assembled into words
182  in any positions, this implementation only supports the two formats in
183  which bytes in adjacent positions within words also have adjacent byte
184  numbers. This order is called big-endian if the lowest numbered bytes
185  in words have the highest numeric significance and little-endian if the
186  opposite applies.
187 
188  This code can work in either order irrespective of the order used by the
189  machine on which it runs. Normally the internal byte order will be set
190  to the order of the processor on which the code is to be run but this
191  define can be used to reverse this in special situations
192 
193  WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.
194  This define will hence be redefined later (in section 4) if necessary
195 */
196 
197 #if 1
198 #define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
199 #elif 0
200 #define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN
201 #elif 0
202 #define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN
203 #else
204 #error The algorithm byte order is not defined
205 #endif
206 
207 /* 2. VIA ACE SUPPORT
208 
209  Define this option if support for the VIA ACE is required. This uses
210  inline assembler instructions and is only implemented for the Microsoft,
211  Intel and GCC compilers. If VIA ACE is known to be present, then defining
212  ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption
213  code. If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if
214  it is detected (both present and enabled) but the normal AES code will
215  also be present.
216 
217  When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte
218  aligned; other input/output buffers do not need to be 16 byte aligned
219  but there are very large performance gains if this can be arranged.
220  VIA ACE also requires the decryption key schedule to be in reverse
221  order (which later checks below ensure).
222 */
223 
224 #if 0 && !defined( USE_VIA_ACE_IF_PRESENT )
225 # define USE_VIA_ACE_IF_PRESENT
226 #endif
227 
228 #if 0 && !defined( ASSUME_VIA_ACE_PRESENT )
229 # define ASSUME_VIA_ACE_PRESENT
230 # endif
231 
232 #if defined ( _WIN64 ) || defined( _WIN32_WCE ) || \
233  defined( _MSC_VER ) && ( _MSC_VER <= 800 )
234 # if defined( USE_VIA_ACE_IF_PRESENT )
235 # undef USE_VIA_ACE_IF_PRESENT
236 # endif
237 # if defined( ASSUME_VIA_ACE_PRESENT )
238 # undef ASSUME_VIA_ACE_PRESENT
239 # endif
240 #endif
241 
242 /* 3. ASSEMBLER SUPPORT
243 
244  This define (which can be on the command line) enables the use of the
245  assembler code routines for encryption, decryption and key scheduling
246  as follows:
247 
248  ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for
249  encryption and decryption and but with key scheduling in C
250  ASM_X86_V2 uses assembler (aes_x86_v2.asm) with compressed tables for
251  encryption, decryption and key scheduling
252  ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for
253  encryption and decryption and but with key scheduling in C
254  ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for
255  encryption and decryption and but with key scheduling in C
256 
257  Change one 'if 0' below to 'if 1' to select the version or define
258  as a compilation option.
259 */
260 
261 #if 0 && !defined( ASM_X86_V1C )
262 # define ASM_X86_V1C
263 #elif 0 && !defined( ASM_X86_V2 )
264 # define ASM_X86_V2
265 #elif 0 && !defined( ASM_X86_V2C )
266 # define ASM_X86_V2C
267 #elif 0 && !defined( ASM_AMD64_C )
268 # define ASM_AMD64_C
269 #endif
270 
271 #if (defined ( ASM_X86_V1C ) || defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )) \
272  && !defined( _M_IX86 ) || defined( ASM_AMD64_C ) && !defined( _M_X64 )
273 # error Assembler code is only available for x86 and AMD64 systems
274 #endif
275 
276 /* 4. FAST INPUT/OUTPUT OPERATIONS.
277 
278  On some machines it is possible to improve speed by transferring the
279  bytes in the input and output arrays to and from the internal 32-bit
280  variables by addressing these arrays as if they are arrays of 32-bit
281  words. On some machines this will always be possible but there may
282  be a large performance penalty if the byte arrays are not aligned on
283  the normal word boundaries. On other machines this technique will
284  lead to memory access errors when such 32-bit word accesses are not
285  properly aligned. The option SAFE_IO avoids such problems but will
286  often be slower on those machines that support misaligned access
287  (especially so if care is taken to align the input and output byte
288  arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
289  assumed that access to byte arrays as if they are arrays of 32-bit
290  words will not cause problems when such accesses are misaligned.
291 */
292 #if 1 && !defined( _MSC_VER )
293 #define SAFE_IO
294 #endif
295 
296 /* 5. LOOP UNROLLING
297 
298  The code for encryption and decrytpion cycles through a number of rounds
299  that can be implemented either in a loop or by expanding the code into a
300  long sequence of instructions, the latter producing a larger program but
301  one that will often be much faster. The latter is called loop unrolling.
302  There are also potential speed advantages in expanding two iterations in
303  a loop with half the number of iterations, which is called partial loop
304  unrolling. The following options allow partial or full loop unrolling
305  to be set independently for encryption and decryption
306 */
307 #if 1
308 #define ENC_UNROLL FULL
309 #elif 0
310 #define ENC_UNROLL PARTIAL
311 #else
312 #define ENC_UNROLL NONE
313 #endif
314 
315 #if 1
316 #define DEC_UNROLL FULL
317 #elif 0
318 #define DEC_UNROLL PARTIAL
319 #else
320 #define DEC_UNROLL NONE
321 #endif
322 
323 /* 6. FAST FINITE FIELD OPERATIONS
324 
325  If this section is included, tables are used to provide faster finite
326  field arithmetic (this has no effect if FIXED_TABLES is defined).
327 */
328 #if 1
329 #define FF_TABLES
330 #endif
331 
332 /* 7. INTERNAL STATE VARIABLE FORMAT
333 
334  The internal state of Rijndael is stored in a number of local 32-bit
335  word varaibles which can be defined either as an array or as individual
336  names variables. Include this section if you want to store these local
337  varaibles in arrays. Otherwise individual local variables will be used.
338 */
339 #if 1
340 #define ARRAYS
341 #endif
342 
343 /* 8. FIXED OR DYNAMIC TABLES
344 
345  When this section is included the tables used by the code are compiled
346  statically into the binary file. Otherwise the subroutine aes_init()
347  must be called to compute them before the code is first used.
348 */
349 #if 1 && !(defined( _MSC_VER ) && ( _MSC_VER <= 800 ))
350 #define FIXED_TABLES
351 #endif
352 
353 /* 9. MASKING OR CASTING FROM LONGER VALUES TO BYTES
354 
355  In some systems it is better to mask to create a byte value rather than
356  casting
357 */
358 #if 0
359 # define to_byte(x) ((uint_8t)(x))
360 #else
361 # define to_byte(x) ((x) & 0xff)
362 #endif
363 
364 /* 10. TABLE ALIGNMENT
365 
366  On some sytsems speed will be improved by aligning the AES large lookup
367  tables on particular boundaries. This define should be set to a power of
368  two giving the desired alignment. It can be left undefined if alignment
369  is not needed. This option is specific to the Microsft VC++ compiler -
370  it seems to sometimes cause trouble for the VC++ version 6 compiler.
371 */
372 
373 #if 1 && defined( _MSC_VER ) && ( _MSC_VER >= 1300 )
374 #define TABLE_ALIGN 32
375 #endif
376 
377 /* 11. TABLE OPTIONS
378 
379  This cipher proceeds by repeating in a number of cycles known as 'rounds'
380  which are implemented by a round function which can optionally be speeded
381  up using tables. The basic tables are each 256 32-bit words, with either
382  one or four tables being required for each round function depending on
383  how much speed is required. The encryption and decryption round functions
384  are different and the last encryption and decrytpion round functions are
385  different again making four different round functions in all.
386 
387  This means that:
388  1. Normal encryption and decryption rounds can each use either 0, 1
389  or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
390  2. The last encryption and decryption rounds can also use either 0, 1
391  or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
392 
393  Include or exclude the appropriate definitions below to set the number
394  of tables used by this implementation.
395 */
396 
397 #if 1 /* set tables for the normal encryption round */
398 #define ENC_ROUND FOUR_TABLES
399 #elif 0
400 #define ENC_ROUND ONE_TABLE
401 #else
402 #define ENC_ROUND NO_TABLES
403 #endif
404 
405 #if 1 /* set tables for the last encryption round */
406 #define LAST_ENC_ROUND FOUR_TABLES
407 #elif 0
408 #define LAST_ENC_ROUND ONE_TABLE
409 #else
410 #define LAST_ENC_ROUND NO_TABLES
411 #endif
412 
413 #if 1 /* set tables for the normal decryption round */
414 #define DEC_ROUND FOUR_TABLES
415 #elif 0
416 #define DEC_ROUND ONE_TABLE
417 #else
418 #define DEC_ROUND NO_TABLES
419 #endif
420 
421 #if 1 /* set tables for the last decryption round */
422 #define LAST_DEC_ROUND FOUR_TABLES
423 #elif 0
424 #define LAST_DEC_ROUND ONE_TABLE
425 #else
426 #define LAST_DEC_ROUND NO_TABLES
427 #endif
428 
429 /* The decryption key schedule can be speeded up with tables in the same
430  way that the round functions can. Include or exclude the following
431  defines to set this requirement.
432 */
433 #if 1
434 #define KEY_SCHED FOUR_TABLES
435 #elif 0
436 #define KEY_SCHED ONE_TABLE
437 #else
438 #define KEY_SCHED NO_TABLES
439 #endif
440 
441 /* ---- END OF USER CONFIGURED OPTIONS ---- */
442 
443 /* VIA ACE support is only available for VC++ and GCC */
444 
445 #if !defined( _MSC_VER ) && !defined( __GNUC__ )
446 # if defined( ASSUME_VIA_ACE_PRESENT )
447 # undef ASSUME_VIA_ACE_PRESENT
448 # endif
449 # if defined( USE_VIA_ACE_IF_PRESENT )
450 # undef USE_VIA_ACE_IF_PRESENT
451 # endif
452 #endif
453 
454 #if defined( ASSUME_VIA_ACE_PRESENT ) && !defined( USE_VIA_ACE_IF_PRESENT )
455 #define USE_VIA_ACE_IF_PRESENT
456 #endif
457 
458 #if defined( USE_VIA_ACE_IF_PRESENT ) && !defined ( AES_REV_DKS )
459 #define AES_REV_DKS
460 #endif
461 
462 /* Assembler support requires the use of platform byte order */
463 
464 #if ( defined( ASM_X86_V1C ) || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) ) \
465  && (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)
466 #undef ALGORITHM_BYTE_ORDER
467 #define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
468 #endif
469 
470 /* In this implementation the columns of the state array are each held in
471  32-bit words. The state array can be held in various ways: in an array
472  of words, in a number of individual word variables or in a number of
473  processor registers. The following define maps a variable name x and
474  a column number c to the way the state array variable is to be held.
475  The first define below maps the state into an array x[c] whereas the
476  second form maps the state into a number of individual variables x0,
477  x1, etc. Another form could map individual state colums to machine
478  register names.
479 */
480 
481 #if defined( ARRAYS )
482 #define s(x,c) x[c]
483 #else
484 #define s(x,c) x##c
485 #endif
486 
487 /* This implementation provides subroutines for encryption, decryption
488  and for setting the three key lengths (separately) for encryption
489  and decryption. Since not all functions are needed, masks are set
490  up here to determine which will be implemented in C
491 */
492 
493 #if !defined( AES_ENCRYPT )
494 # define EFUNCS_IN_C 0
495 #elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \
496  || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C )
497 # define EFUNCS_IN_C ENC_KEYING_IN_C
498 #elif !defined( ASM_X86_V2 )
499 # define EFUNCS_IN_C ( ENCRYPTION_IN_C | ENC_KEYING_IN_C )
500 #else
501 # define EFUNCS_IN_C 0
502 #endif
503 
504 #if !defined( AES_DECRYPT )
505 # define DFUNCS_IN_C 0
506 #elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \
507  || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C )
508 # define DFUNCS_IN_C DEC_KEYING_IN_C
509 #elif !defined( ASM_X86_V2 )
510 # define DFUNCS_IN_C ( DECRYPTION_IN_C | DEC_KEYING_IN_C )
511 #else
512 # define DFUNCS_IN_C 0
513 #endif
514 
515 #define FUNCS_IN_C ( EFUNCS_IN_C | DFUNCS_IN_C )
516 
517 /* END OF CONFIGURATION OPTIONS */
518 
519 #define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2))
520 
521 /* Disable or report errors on some combinations of options */
522 
523 #if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
524 #undef LAST_ENC_ROUND
525 #define LAST_ENC_ROUND NO_TABLES
526 #elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
527 #undef LAST_ENC_ROUND
528 #define LAST_ENC_ROUND ONE_TABLE
529 #endif
530 
531 #if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
532 #undef ENC_UNROLL
533 #define ENC_UNROLL NONE
534 #endif
535 
536 #if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
537 #undef LAST_DEC_ROUND
538 #define LAST_DEC_ROUND NO_TABLES
539 #elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
540 #undef LAST_DEC_ROUND
541 #define LAST_DEC_ROUND ONE_TABLE
542 #endif
543 
544 #if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
545 #undef DEC_UNROLL
546 #define DEC_UNROLL NONE
547 #endif
548 
549 #if defined( bswap32 )
550 #define aes_sw32 bswap32
551 #elif defined( bswap_32 )
552 #define aes_sw32 bswap_32
553 #else
554 #define brot(x,n) (((uint_32t)(x) << n) | ((uint_32t)(x) >> (32 - n)))
555 #define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00))
556 #endif
557 
558 /* upr(x,n): rotates bytes within words by n positions, moving bytes to
559  higher index positions with wrap around into low positions
560  ups(x,n): moves bytes by n positions to higher index positions in
561  words but without wrap around
562  bval(x,n): extracts a byte from a word
563 
564  WARNING: The definitions given here are intended only for use with
565  unsigned variables and with shift counts that are compile
566  time constants
567 */
568 
569 #if ( ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN )
570 #define upr(x,n) (((uint_32t)(x) << (8 * (n))) | ((uint_32t)(x) >> (32 - 8 * (n))))
571 #define ups(x,n) ((uint_32t) (x) << (8 * (n)))
572 #define bval(x,n) to_byte((x) >> (8 * (n)))
573 #define bytes2word(b0, b1, b2, b3) \
574  (((uint_32t)(b3) << 24) | ((uint_32t)(b2) << 16) | ((uint_32t)(b1) << 8) | (b0))
575 #endif
576 
577 #if ( ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN )
578 #define upr(x,n) (((uint_32t)(x) >> (8 * (n))) | ((uint_32t)(x) << (32 - 8 * (n))))
579 #define ups(x,n) ((uint_32t) (x) >> (8 * (n)))
580 #define bval(x,n) to_byte((x) >> (24 - 8 * (n)))
581 #define bytes2word(b0, b1, b2, b3) \
582  (((uint_32t)(b0) << 24) | ((uint_32t)(b1) << 16) | ((uint_32t)(b2) << 8) | (b3))
583 #endif
584 
585 #if defined( SAFE_IO )
586 
587 #define word_in(x,c) bytes2word(((const uint_8t*)(x)+4*c)[0], ((const uint_8t*)(x)+4*c)[1], \
588  ((const uint_8t*)(x)+4*c)[2], ((const uint_8t*)(x)+4*c)[3])
589 #define word_out(x,c,v) { ((uint_8t*)(x)+4*c)[0] = bval(v,0); ((uint_8t*)(x)+4*c)[1] = bval(v,1); \
590  ((uint_8t*)(x)+4*c)[2] = bval(v,2); ((uint_8t*)(x)+4*c)[3] = bval(v,3); }
591 
592 #elif ( ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER )
593 
594 #define word_in(x,c) (*((uint_32t*)(x)+(c)))
595 #define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = (v))
596 
597 #else
598 
599 #define word_in(x,c) aes_sw32(*((uint_32t*)(x)+(c)))
600 #define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = aes_sw32(v))
601 
602 #endif
603 
604 /* the finite field modular polynomial and elements */
605 
606 #define WPOLY 0x011b
607 #define BPOLY 0x1b
608 
609 /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
610 
611 #define m1 0x80808080
612 #define m2 0x7f7f7f7f
613 #define gf_mulx(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
614 
615 /* The following defines provide alternative definitions of gf_mulx that might
616  give improved performance if a fast 32-bit multiply is not available. Note
617  that a temporary variable u needs to be defined where gf_mulx is used.
618 
619 #define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
620 #define m4 (0x01010101 * BPOLY)
621 #define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
622 */
623 
624 /* Work out which tables are needed for the different options */
625 
626 #if defined( ASM_X86_V1C )
627 #if defined( ENC_ROUND )
628 #undef ENC_ROUND
629 #endif
630 #define ENC_ROUND FOUR_TABLES
631 #if defined( LAST_ENC_ROUND )
632 #undef LAST_ENC_ROUND
633 #endif
634 #define LAST_ENC_ROUND FOUR_TABLES
635 #if defined( DEC_ROUND )
636 #undef DEC_ROUND
637 #endif
638 #define DEC_ROUND FOUR_TABLES
639 #if defined( LAST_DEC_ROUND )
640 #undef LAST_DEC_ROUND
641 #endif
642 #define LAST_DEC_ROUND FOUR_TABLES
643 #if defined( KEY_SCHED )
644 #undef KEY_SCHED
645 #define KEY_SCHED FOUR_TABLES
646 #endif
647 #endif
648 
649 #if ( FUNCS_IN_C & ENCRYPTION_IN_C ) || defined( ASM_X86_V1C )
650 #if ENC_ROUND == ONE_TABLE
651 #define FT1_SET
652 #elif ENC_ROUND == FOUR_TABLES
653 #define FT4_SET
654 #else
655 #define SBX_SET
656 #endif
657 #if LAST_ENC_ROUND == ONE_TABLE
658 #define FL1_SET
659 #elif LAST_ENC_ROUND == FOUR_TABLES
660 #define FL4_SET
661 #elif !defined( SBX_SET )
662 #define SBX_SET
663 #endif
664 #endif
665 
666 #if ( FUNCS_IN_C & DECRYPTION_IN_C ) || defined( ASM_X86_V1C )
667 #if DEC_ROUND == ONE_TABLE
668 #define IT1_SET
669 #elif DEC_ROUND == FOUR_TABLES
670 #define IT4_SET
671 #else
672 #define ISB_SET
673 #endif
674 #if LAST_DEC_ROUND == ONE_TABLE
675 #define IL1_SET
676 #elif LAST_DEC_ROUND == FOUR_TABLES
677 #define IL4_SET
678 #elif !defined(ISB_SET)
679 #define ISB_SET
680 #endif
681 #endif
682 
683 #if (FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C)
684 #if KEY_SCHED == ONE_TABLE
685 #define LS1_SET
686 #elif KEY_SCHED == FOUR_TABLES
687 #define LS4_SET
688 #elif !defined( SBX_SET )
689 #define SBX_SET
690 #endif
691 #endif
692 
693 #if (FUNCS_IN_C & DEC_KEYING_IN_C)
694 #if KEY_SCHED == ONE_TABLE
695 #define IM1_SET
696 #elif KEY_SCHED == FOUR_TABLES
697 #define IM4_SET
698 #elif !defined( SBX_SET )
699 #define SBX_SET
700 #endif
701 #endif
702 
703 /* generic definitions of Rijndael macros that use tables */
704 
705 #define no_table(x,box,vf,rf,c) bytes2word( \
706  box[bval(vf(x,0,c),rf(0,c))], \
707  box[bval(vf(x,1,c),rf(1,c))], \
708  box[bval(vf(x,2,c),rf(2,c))], \
709  box[bval(vf(x,3,c),rf(3,c))])
710 
711 #define one_table(x,op,tab,vf,rf,c) \
712  ( tab[bval(vf(x,0,c),rf(0,c))] \
713  ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
714  ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
715  ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
716 
717 #define four_tables(x,tab,vf,rf,c) \
718  ( tab[0][bval(vf(x,0,c),rf(0,c))] \
719  ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
720  ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
721  ^ tab[3][bval(vf(x,3,c),rf(3,c))])
722 
723 #define vf1(x,r,c) (x)
724 #define rf1(r,c) (r)
725 #define rf2(r,c) ((8+r-c)&3)
726 
727 /* perform forward and inverse column mix operation on four bytes in long word x in */
728 /* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */
729 
730 #if defined( FM4_SET ) /* not currently used */
731 #define fwd_mcol(x) four_tables(x,t_use(f,m),vf1,rf1,0)
732 #elif defined( FM1_SET ) /* not currently used */
733 #define fwd_mcol(x) one_table(x,upr,t_use(f,m),vf1,rf1,0)
734 #else
735 #define dec_fmvars uint_32t g2
736 #define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1))
737 #endif
738 
739 #if defined( IM4_SET )
740 #define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0)
741 #elif defined( IM1_SET )
742 #define inv_mcol(x) one_table(x,upr,t_use(i,m),vf1,rf1,0)
743 #else
744 #define dec_imvars uint_32t g2, g4, g9
745 #define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \
746  (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1))
747 #endif
748 
749 #if defined( FL4_SET )
750 #define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c)
751 #elif defined( LS4_SET )
752 #define ls_box(x,c) four_tables(x,t_use(l,s),vf1,rf2,c)
753 #elif defined( FL1_SET )
754 #define ls_box(x,c) one_table(x,upr,t_use(f,l),vf1,rf2,c)
755 #elif defined( LS1_SET )
756 #define ls_box(x,c) one_table(x,upr,t_use(l,s),vf1,rf2,c)
757 #else
758 #define ls_box(x,c) no_table(x,t_use(s,box),vf1,rf2,c)
759 #endif
760 
761 #if defined( ASM_X86_V1C ) && defined( AES_DECRYPT ) && !defined( ISB_SET )
762 #define ISB_SET
763 #endif
764 
765 #endif