Ap4AesBlockCipher.cpp

00001 /*
00002 * AES Block cipher
00003 * (c) 2005 Gilles Boccon-Gibod
00004 * Portions (c) 2001, Dr Brian Gladman (see below)
00005 */
00006 
00007 /*
00008 -------------------------------------------------------------------------
00009 Copyright (c) 2001, Dr Brian Gladman <[email protected]>, Worcester, UK.
00010 All rights reserved.
00011 
00012 LICENSE TERMS
00013 
00014 The free distribution and use of this software in both source and binary 
00015 form is allowed (with or without changes) provided that:
00016 
00017 1. distributions of this source code include the above copyright 
00018 notice, this list of conditions and the following disclaimer;
00019 
00020 2. distributions in binary form include the above copyright
00021 notice, this list of conditions and the following disclaimer
00022 in the documentation and/or other associated materials;
00023 
00024 3. the copyright holder's name is not used to endorse products 
00025 built using this software without specific written permission. 
00026 
00027 DISCLAIMER
00028 
00029 This software is provided 'as is' with no explicit or implied warranties
00030 in respect of its properties, including, but not limited to, correctness 
00031 and fitness for purpose.
00032 -------------------------------------------------------------------------
00033 Issue Date: 29/07/2002
00034 */
00035 
00036 /*----------------------------------------------------------------------
00037 |       includes
00038 +---------------------------------------------------------------------*/
00039 #include "Ap4AesBlockCipher.h"
00040 #include "Ap4Results.h"
00041 
00042 /*----------------------------------------------------------------------
00043 |       build options
00044 +---------------------------------------------------------------------*/
00045 #define ENCRYPTION_KEY_SCHEDULE
00046 #define ENCRYPTION
00047 #define BLOCK_SIZE AP4_AES_BLOCK_SIZE
00048 
00049 /*----------------------------------------------------------------------
00050 |       options
00051 +---------------------------------------------------------------------*/
00052 /*  START OF CONFIGURATION OPTIONS
00053 
00054     USE OF DEFINES
00055   
00056     Later in this section there are a number of defines that control the 
00057     operation of the code.  In each section, the purpose of each define is 
00058     explained so that the relevant form can be included or excluded by 
00059     setting either 1's or 0's respectively on the branches of the related 
00060     #if clauses.
00061 */
00062 
00063 /*  1. BYTE ORDER IN 32-BIT WORDS
00064 
00065     To obtain the highest speed on processors with 32-bit words, this code 
00066     needs to determine the order in which bytes are packed into such words.
00067     The following block of code is an attempt to capture the most obvious 
00068     ways in which various environemnts define byte order. It may well fail, 
00069     in which case the definitions will need to be set by editing at the 
00070     points marked **** EDIT HERE IF NECESSARY **** below.
00071 */
00072 #define AES_LITTLE_ENDIAN   1234 /* byte 0 is least significant (i386) */
00073 #define AES_BIG_ENDIAN      4321 /* byte 0 is most significant (mc68k) */
00074 
00075 #if !defined(AP4_PLATFORM_BYTE_ORDER)
00076 #  error AP4_PLATFORM_BYTE_ORDER is not set
00077 #endif
00078 
00079 #if 0
00080 #if AP4_PLATFORM_BYTE_ORDER == AP4_PLATFORM_BIG_ENDIAN
00081 #define PLATFORM_BYTE_ORDER AES_BIG_ENDIAN
00082 #elif AP4_PLATFORM_BYTE_ORDER == AP4_PLATFORM_LITTLE_ENDIAN
00083 #define PLATFORM_BYTE_ORDER AES_LITTLE_ENDIAN
00084 #else
00085 #error unsupported value for AP4_PLATFORM_BYTE_ORDER
00086 #endif
00087 #endif
00088 
00089 #define PLATFORM_BYTE_ORDER AES_LITTLE_ENDIAN
00090 
00091 
00092 /*  2. BYTE ORDER WITHIN 32 BIT WORDS
00093 
00094     The fundamental data processing units in Rijndael are 8-bit bytes. The 
00095     input, output and key input are all enumerated arrays of bytes in which 
00096     bytes are numbered starting at zero and increasing to one less than the 
00097     number of bytes in the array in question. This enumeration is only used 
00098     for naming bytes and does not imply any adjacency or order relationship 
00099     from one byte to another. When these inputs and outputs are considered 
00100     as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to 
00101     byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte. 
00102     In this implementation bits are numbered from 0 to 7 starting at the 
00103     numerically least significant end of each byte (bit n represents 2^n).
00104 
00105     However, Rijndael can be implemented more efficiently using 32-bit 
00106     words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
00107     into word[n]. While in principle these bytes can be assembled into words 
00108     in any positions, this implementation only supports the two formats in 
00109     which bytes in adjacent positions within words also have adjacent byte
00110     numbers. This order is called big-endian if the lowest numbered bytes 
00111     in words have the highest numeric significance and little-endian if the 
00112     opposite applies. 
00113     
00114     This code can work in either order irrespective of the order used by the 
00115     machine on which it runs. Normally the internal byte order will be set
00116     to the order of the processor on which the code is to be run but this
00117     define can be used to reverse this in special situations
00118 */
00119 #if 1
00120 #define INTERNAL_BYTE_ORDER PLATFORM_BYTE_ORDER
00121 #elif defined(AES_LITTLE_ENDIAN)
00122 #define INTERNAL_BYTE_ORDER AES_LITTLE_ENDIAN
00123 #elif defined(AES_BIG_ENDIAN)
00124 #define INTERNAL_BYTE_ORDER AES_BIG_ENDIAN
00125 #endif
00126 
00127 /*  3. FAST INPUT/OUTPUT OPERATIONS.  
00128 
00129     On some machines it is possible to improve speed by transferring the 
00130     bytes in the input and output arrays to and from the internal 32-bit 
00131     variables by addressing these arrays as if they are arrays of 32-bit 
00132     words.  On some machines this will always be possible but there may 
00133     be a large performance penalty if the byte arrays are not aligned on 
00134     the normal word boundaries. On other machines this technique will 
00135     lead to memory access errors when such 32-bit word accesses are not
00136     properly aligned. The option SAFE_IO avoids such problems but will 
00137     often be slower on those machines that support misaligned access 
00138     (especially so if care is taken to align the input  and output byte 
00139     arrays on 32-bit word boundaries). If SAFE_IO is not defined it is 
00140     assumed that access to byte arrays as if they are arrays of 32-bit 
00141     words will not cause problems when such accesses are misaligned.
00142 */
00143 #if 1
00144 #define SAFE_IO
00145 #endif
00146 
00147 /*  4. LOOP UNROLLING
00148 
00149     The code for encryption and decrytpion cycles through a number of rounds
00150     that can be implemented either in a loop or by expanding the code into a 
00151     long sequence of instructions, the latter producing a larger program but
00152     one that will often be much faster. The latter is called loop unrolling.
00153     There are also potential speed advantages in expanding two iterations in
00154     a loop with half the number of iterations, which is called partial loop
00155     unrolling.  The following options allow partial or full loop unrolling 
00156     to be set independently for encryption and decryption
00157 */
00158 #if 1
00159 #define ENC_UNROLL  FULL
00160 #elif 0
00161 #define ENC_UNROLL  PARTIAL
00162 #else
00163 #define ENC_UNROLL  NONE
00164 #endif
00165 
00166 #if 1
00167 #define DEC_UNROLL  FULL
00168 #elif 0
00169 #define DEC_UNROLL  PARTIAL
00170 #else
00171 #define DEC_UNROLL  NONE
00172 #endif
00173 
00174 /*  5. FIXED OR DYNAMIC TABLES
00175 
00176     When this section is included the tables used by the code are comipled 
00177     statically into the binary file.  Otherwise they are computed once when 
00178     the code is first used.
00179 */
00180 #if 1
00181 #define FIXED_TABLES
00182 #endif
00183 
00184 /*  6. FAST FINITE FIELD OPERATIONS
00185 
00186     If this section is included, tables are used to provide faster finite 
00187     field arithmetic (this has no effect if FIXED_TABLES is defined).
00188 */
00189 #if 1
00190 #define FF_TABLES
00191 #endif
00192 
00193 /*  7. INTERNAL STATE VARIABLE FORMAT
00194 
00195     The internal state of Rijndael is stored in a number of local 32-bit 
00196     word varaibles which can be defined either as an array or as individual 
00197     names variables. Include this section if you want to store these local
00198     variables in arrays. Otherwise individual local variables will be used.
00199 */
00200 #if 1
00201 #define ARRAYS
00202 #endif
00203 
00204 /* In this implementation the columns of the state array are each held in
00205    32-bit words. The state array can be held in various ways: in an array
00206    of words, in a number of individual word variables or in a number of 
00207    processor registers. The following define maps a variable name x and
00208    a column number c to the way the state array variable is to be held.
00209    The first define below maps the state into an array x[c] whereas the 
00210    second form maps the state into a number of individual variables x0,
00211    x1, etc.  Another form could map individual state colums to machine
00212    register names.
00213 */
00214 
00215 #if defined(ARRAYS)
00216 #define s(x,c) x[c]
00217 #else
00218 #define s(x,c) x##c
00219 #endif
00220 
00221 /*  8. VARIABLE BLOCK SIZE SPEED
00222 
00223     This section is only relevant if you wish to use the variable block
00224     length feature of the code.  Include this section if you place more
00225     emphasis on speed rather than code size.
00226 */
00227 #if 1
00228 #define FAST_VARIABLE
00229 #endif
00230 
00231 /*  9. INTERNAL TABLE CONFIGURATION
00232 
00233     This cipher proceeds by repeating in a number of cycles known as 'rounds'
00234     which are implemented by a round function which can optionally be speeded
00235     up using tables.  The basic tables are each 256 32-bit words, with either 
00236     one or four tables being required for each round function depending on
00237     how much speed is required. The encryption and decryption round functions
00238     are different and the last encryption and decrytpion round functions are
00239     different again making four different round functions in all.
00240 
00241     This means that:
00242       1. Normal encryption and decryption rounds can each use either 0, 1 
00243          or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
00244       2. The last encryption and decryption rounds can also use either 0, 1 
00245          or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
00246 
00247     Include or exclude the appropriate definitions below to set the number
00248     of tables used by this implementation.
00249 */
00250 
00251 #if 1   /* set tables for the normal encryption round */
00252 #define ENC_ROUND   FOUR_TABLES
00253 #elif 0
00254 #define ENC_ROUND   ONE_TABLE
00255 #else
00256 #define ENC_ROUND   NO_TABLES
00257 #endif
00258 
00259 #if 1   /* set tables for the last encryption round */
00260 #define LAST_ENC_ROUND  FOUR_TABLES
00261 #elif 0
00262 #define LAST_ENC_ROUND  ONE_TABLE
00263 #else
00264 #define LAST_ENC_ROUND  NO_TABLES
00265 #endif
00266 
00267 #if 1   /* set tables for the normal decryption round */
00268 #define DEC_ROUND   FOUR_TABLES
00269 #elif 0
00270 #define DEC_ROUND   ONE_TABLE
00271 #else
00272 #define DEC_ROUND   NO_TABLES
00273 #endif
00274 
00275 #if 1   /* set tables for the last decryption round */
00276 #define LAST_DEC_ROUND  FOUR_TABLES
00277 #elif 0
00278 #define LAST_DEC_ROUND  ONE_TABLE
00279 #else
00280 #define LAST_DEC_ROUND  NO_TABLES
00281 #endif
00282 
00283 /*  The decryption key schedule can be speeded up with tables in the same
00284     way that the round functions can.  Include or exclude the following 
00285     defines to set this requirement.
00286 */
00287 #if 1
00288 #define KEY_SCHED   FOUR_TABLES
00289 #elif 0
00290 #define KEY_SCHED   ONE_TABLE
00291 #else
00292 #define KEY_SCHED   NO_TABLES
00293 #endif
00294 
00295 /* END OF CONFIGURATION OPTIONS */
00296 
00297 #define NO_TABLES   0   /* DO NOT CHANGE */
00298 #define ONE_TABLE   1   /* DO NOT CHANGE */
00299 #define FOUR_TABLES 4   /* DO NOT CHANGE */
00300 #define NONE        0   /* DO NOT CHANGE */
00301 #define PARTIAL     1   /* DO NOT CHANGE */
00302 #define FULL        2   /* DO NOT CHANGE */
00303 
00304 #if defined(BLOCK_SIZE) && ((BLOCK_SIZE & 3) || BLOCK_SIZE < 16 || BLOCK_SIZE > 32)
00305 #error An illegal block size has been specified.
00306 #endif  
00307 
00308 #if !defined(BLOCK_SIZE)
00309 #define RC_LENGTH    29
00310 #else
00311 #define RC_LENGTH   5 * BLOCK_SIZE / 4 - (BLOCK_SIZE == 16 ? 10 : 11)
00312 #endif
00313 
00314 /* Disable at least some poor combinations of options */
00315 
00316 #if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
00317 #undef  LAST_ENC_ROUND
00318 #define LAST_ENC_ROUND  NO_TABLES
00319 #elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
00320 #undef  LAST_ENC_ROUND
00321 #define LAST_ENC_ROUND  ONE_TABLE 
00322 #endif
00323 
00324 #if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
00325 #undef  ENC_UNROLL
00326 #define ENC_UNROLL  NONE
00327 #endif
00328 
00329 #if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
00330 #undef  LAST_DEC_ROUND
00331 #define LAST_DEC_ROUND  NO_TABLES
00332 #elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
00333 #undef  LAST_DEC_ROUND
00334 #define LAST_DEC_ROUND  ONE_TABLE 
00335 #endif
00336 
00337 #if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
00338 #undef  DEC_UNROLL
00339 #define DEC_UNROLL  NONE
00340 #endif
00341 
00342 /*  upr(x,n):  rotates bytes within words by n positions, moving bytes to
00343                higher index positions with wrap around into low positions
00344     ups(x,n):  moves bytes by n positions to higher index positions in 
00345                words but without wrap around
00346     bval(x,n): extracts a byte from a word
00347 
00348     NOTE:      The definitions given here are intended only for use with 
00349                unsigned variables and with shift counts that are compile
00350                time constants
00351 */
00352 
00353 #if (INTERNAL_BYTE_ORDER == AES_LITTLE_ENDIAN)
00354 #if defined(_MSC_VER)
00355 #define upr(x,n)        _lrotl((aes_32t)(x), 8 * (n))
00356 #else
00357 #define upr(x,n)        ((aes_32t)(x) << 8 * (n) | (aes_32t)(x) >> (32 - 8 * (n)))
00358 #endif
00359 #define ups(x,n)        ((aes_32t)(x) << 8 * (n))
00360 #define bval(x,n)       ((aes_08t)((x) >> 8 * (n)))
00361 #define bytes2word(b0, b1, b2, b3)  \
00362         (((aes_32t)(b3) << 24) | ((aes_32t)(b2) << 16) | ((aes_32t)(b1) << 8) | (b0))
00363 #endif
00364 
00365 #if (INTERNAL_BYTE_ORDER == AES_BIG_ENDIAN)
00366 #define upr(x,n)        ((aes_32t)(x) >> 8 * (n) | (aes_32t)(x) << 32 - 8 * (n))
00367 #define ups(x,n)        ((aes_32t)(x) >> 8 * (n)))
00368 #define bval(x,n)       ((aes_08t)((x) >> (24 - 8 * (n))))
00369 #define bytes2word(b0, b1, b2, b3)  \
00370         (((aes_32t)(b0) << 24) | ((aes_32t)(b1) << 16) | ((aes_32t)(b2) << 8) | (b3))
00371 #endif
00372 
00373 #if defined(SAFE_IO)
00374 
00375 #define word_in(x)      bytes2word((x)[0], (x)[1], (x)[2], (x)[3])
00376 #define word_out(x,v)   { (x)[0] = bval(v,0); (x)[1] = bval(v,1);   \
00377                           (x)[2] = bval(v,2); (x)[3] = bval(v,3);   }
00378 
00379 #elif (INTERNAL_BYTE_ORDER == PLATFORM_BYTE_ORDER)
00380 
00381 #define word_in(x)      *(aes_32t*)(x)
00382 #define word_out(x,v)   *(aes_32t*)(x) = (v)
00383 
00384 #else
00385 
00386 #if !defined(bswap_32)
00387 #if !defined(_MSC_VER)
00388 #define _lrotl(x,n)     ((((aes_32t)(x)) <<  n) | (((aes_32t)(x)) >> (32 - n)))
00389 #endif
00390 #define bswap_32(x)     ((_lrotl((x),8) & 0x00ff00ff) | (_lrotl((x),24) & 0xff00ff00)) 
00391 #endif
00392 
00393 #define word_in(x)      bswap_32(*(aes_32t*)(x))
00394 #define word_out(x,v)   *(aes_32t*)(x) = bswap_32(v)
00395 
00396 #endif
00397 
00398 /* the finite field modular polynomial and elements */
00399 
00400 #define WPOLY   0x011b
00401 #define BPOLY     0x1b
00402 
00403 /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
00404 
00405 #define m1  0x80808080
00406 #define m2  0x7f7f7f7f
00407 #define FFmulX(x)  ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
00408 
00409 /* The following defines provide alternative definitions of FFmulX that might
00410    give improved performance if a fast 32-bit multiply is not available. Note
00411    that a temporary variable u needs to be defined where FFmulX is used.
00412 
00413 #define FFmulX(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6)) 
00414 #define m4  (0x01010101 * BPOLY)
00415 #define FFmulX(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4) 
00416 */
00417 
00418 /* Work out which tables are needed for the different options   */
00419 
00420 #ifdef  AES_ASM
00421 #ifdef  ENC_ROUND
00422 #undef  ENC_ROUND
00423 #endif
00424 #define ENC_ROUND   FOUR_TABLES
00425 #ifdef  LAST_ENC_ROUND
00426 #undef  LAST_ENC_ROUND
00427 #endif
00428 #define LAST_ENC_ROUND  FOUR_TABLES
00429 #ifdef  DEC_ROUND
00430 #undef  DEC_ROUND
00431 #endif
00432 #define DEC_ROUND   FOUR_TABLES
00433 #ifdef  LAST_DEC_ROUND
00434 #undef  LAST_DEC_ROUND
00435 #endif
00436 #define LAST_DEC_ROUND  FOUR_TABLES
00437 #ifdef  KEY_SCHED
00438 #undef  KEY_SCHED
00439 #define KEY_SCHED   FOUR_TABLES
00440 #endif
00441 #endif
00442 
00443 #if defined(ENCRYPTION) || defined(AES_ASM)
00444 #if ENC_ROUND == ONE_TABLE
00445 #define FT1_SET
00446 #elif ENC_ROUND == FOUR_TABLES
00447 #define FT4_SET
00448 #else
00449 #define SBX_SET
00450 #endif
00451 #if LAST_ENC_ROUND == ONE_TABLE
00452 #define FL1_SET
00453 #elif LAST_ENC_ROUND == FOUR_TABLES
00454 #define FL4_SET
00455 #elif !defined(SBX_SET)
00456 #define SBX_SET
00457 #endif
00458 #endif
00459 
00460 #if defined(DECRYPTION) || defined(AES_ASM)
00461 #if DEC_ROUND == ONE_TABLE
00462 #define IT1_SET
00463 #elif DEC_ROUND == FOUR_TABLES
00464 #define IT4_SET
00465 #else
00466 #define ISB_SET
00467 #endif
00468 #if LAST_DEC_ROUND == ONE_TABLE
00469 #define IL1_SET
00470 #elif LAST_DEC_ROUND == FOUR_TABLES
00471 #define IL4_SET
00472 #elif !defined(ISB_SET)
00473 #define ISB_SET
00474 #endif
00475 #endif
00476 
00477 #if defined(ENCRYPTION_KEY_SCHEDULE) || defined(DECRYPTION_KEY_SCHEDULE)
00478 #if KEY_SCHED == ONE_TABLE
00479 #define LS1_SET
00480 #define IM1_SET
00481 #elif KEY_SCHED == FOUR_TABLES
00482 #define LS4_SET
00483 #define IM4_SET
00484 #elif !defined(SBX_SET)
00485 #define SBX_SET
00486 #endif
00487 #endif
00488 
00489 #ifdef  FIXED_TABLES
00490 #define prefx   static const
00491 #else
00492 #define prefx   extern
00493 extern aes_08t  tab_init;
00494 void gen_tabs(void);
00495 #endif
00496 
00497 //prefx aes_32t  rcon_tab[29];
00498 //
00499 //#ifdef  SBX_SET
00500 //prefx aes_08t s_box[256];
00501 //#endif
00502 //
00503 //#ifdef  ISB_SET
00504 //prefx aes_08t inv_s_box[256];
00505 //#endif
00506 //
00507 //#ifdef  FT1_SET
00508 //prefx aes_32t ft_tab[256];
00509 //#endif
00510 //
00511 //#ifdef  FT4_SET
00512 //prefx aes_32t ft_tab[4][256];
00513 //#endif
00514 //
00515 //#ifdef  FL1_SET
00516 //prefx aes_32t fl_tab[256];
00517 //#endif
00518 //
00519 //#ifdef  FL4_SET
00520 //prefx aes_32t fl_tab[4][256];
00521 //#endif
00522 //
00523 //#ifdef  IT1_SET
00524 //prefx aes_32t it_tab[256];
00525 //#endif
00526 //
00527 //#ifdef  IT4_SET
00528 //prefx aes_32t it_tab[4][256];
00529 //#endif
00530 //
00531 //#ifdef  IL1_SET
00532 //prefx aes_32t il_tab[256];
00533 //#endif
00534 //
00535 //#ifdef  IL4_SET
00536 //prefx aes_32t il_tab[4][256];
00537 //#endif
00538 //
00539 //#ifdef  LS1_SET
00540 //#ifdef  FL1_SET
00541 //#undef  LS1_SET
00542 //#else
00543 //prefx aes_32t ls_tab[256];
00544 //#endif
00545 //#endif
00546 //
00547 //#ifdef  LS4_SET
00548 //#ifdef  FL4_SET
00549 //#undef  LS4_SET
00550 //#else
00551 //prefx aes_32t ls_tab[4][256];
00552 //#endif
00553 //#endif
00554 //
00555 //#ifdef  IM1_SET
00556 //prefx aes_32t im_tab[256];
00557 //#endif
00558 //
00559 //#ifdef  IM4_SET
00560 //prefx aes_32t im_tab[4][256];
00561 //#endif
00562 
00563 /* Set the number of columns in nc.  Note that it is important
00564    that nc is a constant which is known at compile time if the
00565    highest speed version of the code is needed.
00566 */
00567 
00568 #if defined(BLOCK_SIZE)
00569 #define nc  (BLOCK_SIZE >> 2)
00570 #else
00571 #define nc  (cx->n_blk >> 2)
00572 #endif
00573 
00574 /* generic definitions of Rijndael macros that use tables    */
00575 
00576 #define no_table(x,box,vf,rf,c) bytes2word( \
00577     box[bval(vf(x,0,c),rf(0,c))], \
00578     box[bval(vf(x,1,c),rf(1,c))], \
00579     box[bval(vf(x,2,c),rf(2,c))], \
00580     box[bval(vf(x,3,c),rf(3,c))])
00581 
00582 #define one_table(x,op,tab,vf,rf,c) \
00583  (     tab[bval(vf(x,0,c),rf(0,c))] \
00584   ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
00585   ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
00586   ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
00587 
00588 #define four_tables(x,tab,vf,rf,c) \
00589  (  tab[0][bval(vf(x,0,c),rf(0,c))] \
00590   ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
00591   ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
00592   ^ tab[3][bval(vf(x,3,c),rf(3,c))])
00593 
00594 #define vf1(x,r,c)  (x)
00595 #define rf1(r,c)    (r)
00596 #define rf2(r,c)    ((r-c)&3)
00597 
00598 /* perform forward and inverse column mix operation on four bytes in long word x in */
00599 /* parallel. NOTE: x must be a simple variable, NOT an expression in these macros.  */
00600 
00601 #define dec_fmvars
00602 #if defined(FM4_SET)    /* not currently used */
00603 #define fwd_mcol(x)     four_tables(x,fm_tab,vf1,rf1,0)
00604 #elif defined(FM1_SET)  /* not currently used */
00605 #define fwd_mcol(x)     one_table(x,upr,fm_tab,vf1,rf1,0)
00606 #else
00607 #undef  dec_fmvars
00608 #define dec_fmvars      aes_32t f1, f2;
00609 #define fwd_mcol(x)     (f1 = (x), f2 = FFmulX(f1), f2 ^ upr(f1 ^ f2, 3) ^ upr(f1, 2) ^ upr(f1, 1))
00610 #endif
00611 
00612 #define dec_imvars
00613 #if defined(IM4_SET)
00614 #define inv_mcol(x)     four_tables(x,im_tab,vf1,rf1,0)
00615 #elif defined(IM1_SET)
00616 #define inv_mcol(x)     one_table(x,upr,im_tab,vf1,rf1,0)
00617 #else
00618 #undef  dec_imvars
00619 #define dec_imvars      aes_32t    f2, f4, f8, f9;
00620 #define inv_mcol(x) \
00621     (f9 = (x), f2 = FFmulX(f9), f4 = FFmulX(f2), f8 = FFmulX(f4), f9 ^= f8, \
00622     f2 ^= f4 ^ f8 ^ upr(f2 ^ f9,3) ^ upr(f4 ^ f9,2) ^ upr(f9,1))
00623 #endif
00624 
00625 #if defined(FL4_SET)
00626 #define ls_box(x,c)     four_tables(x,fl_tab,vf1,rf2,c)
00627 #elif   defined(LS4_SET)
00628 #define ls_box(x,c)     four_tables(x,ls_tab,vf1,rf2,c)
00629 #elif defined(FL1_SET)
00630 #define ls_box(x,c)     one_table(x,upr,fl_tab,vf1,rf2,c)
00631 #elif defined(LS1_SET)
00632 #define ls_box(x,c)     one_table(x,upr,ls_tab,vf1,rf2,c)
00633 #else
00634 #define ls_box(x,c)     no_table(x,s_box,vf1,rf2,c)
00635 #endif
00636 
00637 /*----------------------------------------------------------------------
00638 |       tables
00639 +---------------------------------------------------------------------*/
00640 #if defined(FIXED_TABLES) || !defined(FF_TABLES) 
00641 
00642 /*  finite field arithmetic operations */
00643 
00644 #define f2(x)   ((x<<1) ^ (((x>>7) & 1) * WPOLY))
00645 #define f4(x)   ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
00646 #define f8(x)   ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \
00647                         ^ (((x>>5) & 4) * WPOLY))
00648 #define f3(x)   (f2(x) ^ x)
00649 #define f9(x)   (f8(x) ^ x)
00650 #define fb(x)   (f8(x) ^ f2(x) ^ x)
00651 #define fd(x)   (f8(x) ^ f4(x) ^ x)
00652 #define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
00653 
00654 #endif
00655 
00656 #if defined(FIXED_TABLES)
00657 
00658 #define sb_data(w) \
00659     w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
00660     w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
00661     w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
00662     w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
00663     w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
00664     w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
00665     w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
00666     w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
00667     w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
00668     w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
00669     w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
00670     w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
00671     w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
00672     w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
00673     w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
00674     w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
00675     w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
00676     w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
00677     w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
00678     w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
00679     w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
00680     w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
00681     w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
00682     w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
00683     w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
00684     w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
00685     w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
00686     w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
00687     w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
00688     w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
00689     w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
00690     w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16)
00691 
00692 #define isb_data(w) \
00693     w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\
00694     w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\
00695     w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\
00696     w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\
00697     w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\
00698     w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\
00699     w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\
00700     w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\
00701     w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\
00702     w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\
00703     w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\
00704     w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\
00705     w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\
00706     w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\
00707     w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\
00708     w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\
00709     w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\
00710     w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\
00711     w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\
00712     w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\
00713     w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\
00714     w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\
00715     w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\
00716     w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\
00717     w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\
00718     w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\
00719     w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\
00720     w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\
00721     w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\
00722     w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\
00723     w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\
00724     w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d),
00725 
00726 #define mm_data(w) \
00727     w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\
00728     w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\
00729     w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\
00730     w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\
00731     w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\
00732     w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\
00733     w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\
00734     w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\
00735     w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\
00736     w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\
00737     w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\
00738     w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\
00739     w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\
00740     w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\
00741     w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\
00742     w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\
00743     w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\
00744     w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\
00745     w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\
00746     w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\
00747     w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\
00748     w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\
00749     w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\
00750     w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\
00751     w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\
00752     w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\
00753     w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\
00754     w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\
00755     w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\
00756     w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\
00757     w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\
00758     w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff)
00759 
00760 #define h0(x)   (x)
00761 
00762 /*  These defines are used to ensure tables are generated in the 
00763     right format depending on the internal byte order required
00764 */
00765 
00766 #define w0(p)   bytes2word(p, 0, 0, 0)
00767 #define w1(p)   bytes2word(0, p, 0, 0)
00768 #define w2(p)   bytes2word(0, 0, p, 0)
00769 #define w3(p)   bytes2word(0, 0, 0, p)
00770 
00771 /*  Number of elements required in this table for different
00772     block and key lengths is:
00773 
00774     Rcon Table      key length (bytes)
00775     Length          16  20  24  28  32
00776                 ---------------------
00777     block     16 |  10   9   8   7   7
00778     length    20 |  14  11  10   9   9
00779     (bytes)   24 |  19  15  12  11  11
00780               28 |  24  19  16  13  13
00781               32 |  29  23  19  17  14
00782 
00783     this table can be a table of bytes if the key schedule
00784     code is adjusted accordingly
00785 */
00786 
00787 #define u0(p)   bytes2word(f2(p), p, p, f3(p))
00788 #define u1(p)   bytes2word(f3(p), f2(p), p, p)
00789 #define u2(p)   bytes2word(p, f3(p), f2(p), p)
00790 #define u3(p)   bytes2word(p, p, f3(p), f2(p))
00791 
00792 #define v0(p)   bytes2word(fe(p), f9(p), fd(p), fb(p))
00793 #define v1(p)   bytes2word(fb(p), fe(p), f9(p), fd(p))
00794 #define v2(p)   bytes2word(fd(p), fb(p), fe(p), f9(p))
00795 #define v3(p)   bytes2word(f9(p), fd(p), fb(p), fe(p))
00796 
00797 static const aes_32t rcon_tab[29] =
00798 {
00799     w0(0x01), w0(0x02), w0(0x04), w0(0x08),
00800     w0(0x10), w0(0x20), w0(0x40), w0(0x80),
00801     w0(0x1b), w0(0x36), w0(0x6c), w0(0xd8),
00802     w0(0xab), w0(0x4d), w0(0x9a), w0(0x2f),
00803     w0(0x5e), w0(0xbc), w0(0x63), w0(0xc6),
00804     w0(0x97), w0(0x35), w0(0x6a), w0(0xd4),
00805     w0(0xb3), w0(0x7d), w0(0xfa), w0(0xef),
00806     w0(0xc5)
00807 };
00808 
00809 #ifdef  SBX_SET
00810 static const aes_08t s_box[256] = { sb_data(h0) };
00811 #endif
00812 #ifdef  ISB_SET
00813 static const aes_08t inv_s_box[256] = { isb_data(h0) };
00814 #endif
00815 
00816 #ifdef  FT1_SET
00817 static const aes_32t ft_tab[256] = { sb_data(u0) };
00818 #endif
00819 #ifdef  FT4_SET
00820 static const aes_32t ft_tab[4][256] = 
00821     { {  sb_data(u0) }, {  sb_data(u1) }, {  sb_data(u2) }, {  sb_data(u3) } };
00822 #endif
00823 
00824 #ifdef  FL1_SET
00825 static const aes_32t fl_tab[256] = { sb_data(w0) };
00826 #endif
00827 #ifdef  FL4_SET
00828 static const aes_32t fl_tab[4][256] = 
00829     { {  sb_data(w0) }, {  sb_data(w1) }, {  sb_data(w2) }, {  sb_data(w3) } };
00830 #endif
00831 
00832 #ifdef  IT1_SET
00833 static const aes_32t it_tab[256] = { isb_data(v0) };
00834 #endif
00835 #ifdef  IT4_SET
00836 static const aes_32t it_tab[4][256] =
00837     { { isb_data(v0) }, { isb_data(v1) }, { isb_data(v2) }, { isb_data(v3) } };
00838 #endif
00839 
00840 #ifdef  IL1_SET
00841 static const aes_32t il_tab[256] = { isb_data(w0) };
00842 #endif
00843 #ifdef  IL4_SET
00844 static const aes_32t il_tab[4][256] = 
00845     { { isb_data(w0) }, { isb_data(w1) }, { isb_data(w2) }, { isb_data(w3) } };
00846 #endif
00847 
00848 #ifdef  LS1_SET
00849 static const aes_32t ls_tab[256] = { sb_data(w0) };
00850 #endif
00851 #ifdef  LS4_SET
00852 static const aes_32t ls_tab[4][256] =
00853     { {  sb_data(w0) }, {  sb_data(w1) }, {  sb_data(w2) }, {  sb_data(w3) } };
00854 #endif
00855 
00856 #ifdef  IM1_SET
00857 static const aes_32t im_tab[256] = { mm_data(v0) };
00858 #endif
00859 #ifdef  IM4_SET
00860 static const aes_32t im_tab[4][256] = 
00861     { {  mm_data(v0) }, {  mm_data(v1) }, {  mm_data(v2) }, {  mm_data(v3) } };
00862 #endif
00863 
00864 #else   /* dynamic table generation */
00865 
00866 aes_08t tab_init = 0;
00867 
00868 #define const
00869 
00870 static aes_32t  rcon_tab[RC_LENGTH];
00871 
00872 #ifdef  SBX_SET
00873 aes_08t s_box[256];
00874 #endif
00875 #ifdef  ISB_SET
00876 aes_08t inv_s_box[256];
00877 #endif
00878 
00879 #ifdef  FT1_SET
00880 aes_32t ft_tab[256];
00881 #endif
00882 #ifdef  FT4_SET
00883 aes_32t ft_tab[4][256];
00884 #endif
00885 
00886 #ifdef  FL1_SET
00887 aes_32t fl_tab[256];
00888 #endif
00889 #ifdef  FL4_SET
00890 aes_32t fl_tab[4][256];
00891 #endif
00892 
00893 #ifdef  IT1_SET
00894 aes_32t it_tab[256];
00895 #endif
00896 #ifdef  IT4_SET
00897 aes_32t it_tab[4][256];
00898 #endif
00899 
00900 #ifdef  IL1_SET
00901 aes_32t il_tab[256];
00902 #endif
00903 #ifdef  IL4_SET
00904 aes_32t il_tab[4][256];
00905 #endif
00906 
00907 #ifdef  LS1_SET
00908 aes_32t ls_tab[256];
00909 #endif
00910 #ifdef  LS4_SET
00911 aes_32t ls_tab[4][256];
00912 #endif
00913 
00914 #ifdef  IM1_SET
00915 aes_32t im_tab[256];
00916 #endif
00917 #ifdef  IM4_SET
00918 aes_32t im_tab[4][256];
00919 #endif
00920 
00921 #if !defined(FF_TABLES)
00922 
00923 /*  Generate the tables for the dynamic table option
00924 
00925     It will generally be sensible to use tables to compute finite 
00926     field multiplies and inverses but where memory is scarse this 
00927     code might sometimes be better. But it only has effect during
00928     initialisation so its pretty unimportant in overall terms.
00929 */
00930 
00931 /*  return 2 ^ (n - 1) where n is the bit number of the highest bit
00932     set in x with x in the range 1 < x < 0x00000200.   This form is
00933     used so that locals within fi can be bytes rather than words
00934 */
00935 
00936 static aes_08t hibit(const aes_32t x)
00937 {   aes_08t r = (aes_08t)((x >> 1) | (x >> 2));
00938     
00939     r |= (r >> 2);
00940     r |= (r >> 4);
00941     return (r + 1) >> 1;
00942 }
00943 
00944 /* return the inverse of the finite field element x */
00945 
00946 static aes_08t fi(const aes_08t x)
00947 {   aes_08t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
00948 
00949     if(x < 2) return x;
00950 
00951     for(;;)
00952     {
00953         if(!n1) return v1;
00954 
00955         while(n2 >= n1)
00956         {   
00957             n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2);
00958         }
00959         
00960         if(!n2) return v2;
00961 
00962         while(n1 >= n2)
00963         {   
00964             n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1);
00965         }
00966     }
00967 }
00968 
00969 #else
00970 
00971 /* define the finite field multiplies required for Rijndael */
00972 
00973 #define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
00974 #define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
00975 #define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
00976 #define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
00977 #define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
00978 #define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
00979 #define fi(x) ((x) ?   pow[255 - log[x]]: 0)
00980 
00981 #endif
00982 
00983 /* The forward and inverse affine transformations used in the S-box */
00984 
00985 #define fwd_affine(x) \
00986     (w = (aes_32t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(aes_08t)(w^(w>>8)))
00987 
00988 #define inv_affine(x) \
00989     (w = (aes_32t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(aes_08t)(w^(w>>8)))
00990 
00991 void gen_tabs(void)
00992 {   aes_32t  i, w;
00993 
00994 #if defined(FF_TABLES)
00995 
00996     aes_08t  pow[512], log[256];
00997 
00998     /*  log and power tables for GF(2^8) finite field with
00999         WPOLY as modular polynomial - the simplest primitive
01000         root is 0x03, used here to generate the tables
01001     */
01002 
01003     i = 0; w = 1; 
01004     do
01005     {   
01006         pow[i] = (aes_08t)w;
01007         pow[i + 255] = (aes_08t)w;
01008         log[w] = (aes_08t)i++;
01009         w ^=  (w << 1) ^ (w & 0x80 ? WPOLY : 0);
01010     }
01011     while (w != 1);
01012 
01013 #endif
01014 
01015     for(i = 0, w = 1; i < RC_LENGTH; ++i)
01016     {
01017         rcon_tab[i] = bytes2word(w, 0, 0, 0);
01018         w = f2(w);
01019     }
01020 
01021     for(i = 0; i < 256; ++i)
01022     {   aes_08t    b;
01023 
01024         b = fwd_affine(fi((aes_08t)i));
01025         w = bytes2word(f2(b), b, b, f3(b));
01026 
01027 #ifdef  SBX_SET
01028         s_box[i] = b;
01029 #endif
01030 
01031 #ifdef  FT1_SET                 /* tables for a normal encryption round */
01032         ft_tab[i] = w;
01033 #endif
01034 #ifdef  FT4_SET
01035         ft_tab[0][i] = w;
01036         ft_tab[1][i] = upr(w,1);
01037         ft_tab[2][i] = upr(w,2);
01038         ft_tab[3][i] = upr(w,3);
01039 #endif
01040         w = bytes2word(b, 0, 0, 0);
01041 
01042 #ifdef  FL1_SET                 /* tables for last encryption round (may also   */
01043         fl_tab[i] = w;          /* be used in the key schedule)                 */
01044 #endif
01045 #ifdef  FL4_SET
01046         fl_tab[0][i] = w;
01047         fl_tab[1][i] = upr(w,1);
01048         fl_tab[2][i] = upr(w,2);
01049         fl_tab[3][i] = upr(w,3);
01050 #endif
01051 
01052 #ifdef  LS1_SET                 /* table for key schedule if fl_tab above is    */
01053         ls_tab[i] = w;          /* not of the required form                     */
01054 #endif
01055 #ifdef  LS4_SET
01056         ls_tab[0][i] = w;
01057         ls_tab[1][i] = upr(w,1);
01058         ls_tab[2][i] = upr(w,2);
01059         ls_tab[3][i] = upr(w,3);
01060 #endif
01061 
01062         b = fi(inv_affine((aes_08t)i));
01063         w = bytes2word(fe(b), f9(b), fd(b), fb(b));
01064 
01065 #ifdef  IM1_SET                 /* tables for the inverse mix column operation  */
01066         im_tab[b] = w;
01067 #endif
01068 #ifdef  IM4_SET
01069         im_tab[0][b] = w;
01070         im_tab[1][b] = upr(w,1);
01071         im_tab[2][b] = upr(w,2);
01072         im_tab[3][b] = upr(w,3);
01073 #endif
01074 
01075 #ifdef  ISB_SET
01076         inv_s_box[i] = b;
01077 #endif
01078 #ifdef  IT1_SET                 /* tables for a normal decryption round */
01079         it_tab[i] = w;
01080 #endif
01081 #ifdef  IT4_SET
01082         it_tab[0][i] = w;
01083         it_tab[1][i] = upr(w,1);
01084         it_tab[2][i] = upr(w,2);
01085         it_tab[3][i] = upr(w,3);
01086 #endif
01087         w = bytes2word(b, 0, 0, 0);
01088 #ifdef  IL1_SET                 /* tables for last decryption round */
01089         il_tab[i] = w;
01090 #endif
01091 #ifdef  IL4_SET
01092         il_tab[0][i] = w;
01093         il_tab[1][i] = upr(w,1);
01094         il_tab[2][i] = upr(w,2);
01095         il_tab[3][i] = upr(w,3);
01096 #endif
01097     }
01098 
01099     tab_init = 1;
01100 }
01101 
01102 #endif
01103 
01104 /*----------------------------------------------------------------------
01105 |       key schedule
01106 +---------------------------------------------------------------------*/
01107 #if !defined(BLOCK_SIZE)
01108 
01109 static aes_rval aes_blk_len(unsigned int blen, aes_ctx cx[1])
01110 {
01111 #if !defined(FIXED_TABLES)
01112     if(!tab_init) gen_tabs();
01113 #endif
01114 
01115     if((blen & 7) || blen < 16 || blen > 32) 
01116     {     
01117         cx->n_blk = 0; return aes_bad;
01118     }
01119 
01120     cx->n_blk = blen;
01121     return aes_good;
01122 }
01123 
01124 #endif
01125 
01126 /* Initialise the key schedule from the user supplied key. The key
01127    length is now specified in bytes - 16, 24 or 32 as appropriate.
01128    This corresponds to bit lengths of 128, 192 and 256 bits, and
01129    to Nk values of 4, 6 and 8 respectively.
01130 
01131    The following macros implement a single cycle in the key 
01132    schedule generation process. The number of cycles needed 
01133    for each cx->n_col and nk value is:
01134  
01135     nk =             4  5  6  7  8
01136     ------------------------------
01137     cx->n_col = 4   10  9  8  7  7
01138     cx->n_col = 5   14 11 10  9  9
01139     cx->n_col = 6   19 15 12 11 11
01140     cx->n_col = 7   21 19 16 13 14
01141     cx->n_col = 8   29 23 19 17 14
01142 */
01143 
01144 #define ke4(k,i) \
01145 {   k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; k[4*(i)+5] = ss[1] ^= ss[0]; \
01146     k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \
01147 }
01148 #define kel4(k,i) \
01149 {   k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; k[4*(i)+5] = ss[1] ^= ss[0]; \
01150     k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \
01151 }
01152 
01153 #define ke6(k,i) \
01154 {   k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; k[6*(i)+ 7] = ss[1] ^= ss[0]; \
01155     k[6*(i)+ 8] = ss[2] ^= ss[1]; k[6*(i)+ 9] = ss[3] ^= ss[2]; \
01156     k[6*(i)+10] = ss[4] ^= ss[3]; k[6*(i)+11] = ss[5] ^= ss[4]; \
01157 }
01158 #define kel6(k,i) \
01159 {   k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; k[6*(i)+ 7] = ss[1] ^= ss[0]; \
01160     k[6*(i)+ 8] = ss[2] ^= ss[1]; k[6*(i)+ 9] = ss[3] ^= ss[2]; \
01161 }
01162 
01163 #define ke8(k,i) \
01164 {   k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; k[8*(i)+ 9] = ss[1] ^= ss[0]; \
01165     k[8*(i)+10] = ss[2] ^= ss[1]; k[8*(i)+11] = ss[3] ^= ss[2]; \
01166     k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); k[8*(i)+13] = ss[5] ^= ss[4]; \
01167     k[8*(i)+14] = ss[6] ^= ss[5]; k[8*(i)+15] = ss[7] ^= ss[6]; \
01168 }
01169 #define kel8(k,i) \
01170 {   k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; k[8*(i)+ 9] = ss[1] ^= ss[0]; \
01171     k[8*(i)+10] = ss[2] ^= ss[1]; k[8*(i)+11] = ss[3] ^= ss[2]; \
01172 }
01173 
01174 #if defined(ENCRYPTION_KEY_SCHEDULE)
01175 
01176 static aes_rval aes_enc_key(const unsigned char in_key[], unsigned int klen, aes_ctx cx[1])
01177 {   aes_32t    ss[8]; 
01178 
01179 #if !defined(FIXED_TABLES)
01180     if(!tab_init) gen_tabs();
01181 #endif
01182 
01183 #if !defined(BLOCK_SIZE)
01184     if(!cx->n_blk) cx->n_blk = 16;
01185 #else
01186     cx->n_blk = BLOCK_SIZE;
01187 #endif
01188     
01189     cx->n_blk = (cx->n_blk & ~3) | 1;
01190 
01191     cx->k_sch[0] = ss[0] = word_in(in_key     );
01192     cx->k_sch[1] = ss[1] = word_in(in_key +  4);
01193     cx->k_sch[2] = ss[2] = word_in(in_key +  8);
01194     cx->k_sch[3] = ss[3] = word_in(in_key + 12);
01195 
01196 #if (BLOCK_SIZE == 16) && (ENC_UNROLL != NONE)
01197 
01198     switch(klen)
01199     {
01200     case 16:    ke4(cx->k_sch, 0); ke4(cx->k_sch, 1); 
01201                 ke4(cx->k_sch, 2); ke4(cx->k_sch, 3);
01202                 ke4(cx->k_sch, 4); ke4(cx->k_sch, 5); 
01203                 ke4(cx->k_sch, 6); ke4(cx->k_sch, 7);
01204                 ke4(cx->k_sch, 8); kel4(cx->k_sch, 9); 
01205                 cx->n_rnd = 10; break;
01206     case 24:    cx->k_sch[4] = ss[4] = word_in(in_key + 16);
01207                 cx->k_sch[5] = ss[5] = word_in(in_key + 20);
01208                 ke6(cx->k_sch, 0); ke6(cx->k_sch, 1); 
01209                 ke6(cx->k_sch, 2); ke6(cx->k_sch, 3);
01210                 ke6(cx->k_sch, 4); ke6(cx->k_sch, 5); 
01211                 ke6(cx->k_sch, 6); kel6(cx->k_sch, 7); 
01212                 cx->n_rnd = 12; break;
01213     case 32:    cx->k_sch[4] = ss[4] = word_in(in_key + 16);
01214                 cx->k_sch[5] = ss[5] = word_in(in_key + 20);
01215                 cx->k_sch[6] = ss[6] = word_in(in_key + 24);
01216                 cx->k_sch[7] = ss[7] = word_in(in_key + 28);
01217                 ke8(cx->k_sch, 0); ke8(cx->k_sch, 1); 
01218                 ke8(cx->k_sch, 2); ke8(cx->k_sch, 3);
01219                 ke8(cx->k_sch, 4); ke8(cx->k_sch, 5); 
01220                 kel8(cx->k_sch, 6); 
01221                 cx->n_rnd = 14; break;
01222     default:    cx->n_rnd = 0; return aes_bad; 
01223     }
01224 #else
01225     {   aes_32t i, l;
01226         cx->n_rnd = ((klen >> 2) > nc ? (klen >> 2) : nc) + 6;
01227         l = (nc * cx->n_rnd + nc - 1) / (klen >> 2);
01228 
01229         switch(klen)
01230         {
01231         case 16:    for(i = 0; i < l; ++i)
01232                         ke4(cx->k_sch, i);
01233                     break;
01234         case 24:    cx->k_sch[4] = ss[4] = word_in(in_key + 16);
01235                     cx->k_sch[5] = ss[5] = word_in(in_key + 20);
01236                     for(i = 0; i < l; ++i)
01237                         ke6(cx->k_sch, i);
01238                     break;
01239         case 32:    cx->k_sch[4] = ss[4] = word_in(in_key + 16);
01240                     cx->k_sch[5] = ss[5] = word_in(in_key + 20);
01241                     cx->k_sch[6] = ss[6] = word_in(in_key + 24);
01242                     cx->k_sch[7] = ss[7] = word_in(in_key + 28);
01243                     for(i = 0; i < l; ++i)
01244                         ke8(cx->k_sch,  i);
01245                     break;
01246         default:    cx->n_rnd = 0; return aes_bad; 
01247         }
01248     }
01249 #endif
01250 
01251     return aes_good;
01252 }
01253 
01254 #endif
01255 
01256 #if defined(DECRYPTION_KEY_SCHEDULE)
01257 
01258 #if (DEC_ROUND != NO_TABLES)
01259 #define d_vars  dec_imvars
01260 #define ff(x)   inv_mcol(x)
01261 #else
01262 #define ff(x)   (x)
01263 #define d_vars
01264 #endif
01265 
01266 #if 1
01267 #define kdf4(k,i) \
01268 {   ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; ss[1] = ss[1] ^ ss[3]; ss[2] = ss[2] ^ ss[3]; ss[3] = ss[3]; \
01269     ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; ss[i % 4] ^= ss[4]; \
01270     ss[4] ^= k[4*(i)];   k[4*(i)+4] = ff(ss[4]); ss[4] ^= k[4*(i)+1]; k[4*(i)+5] = ff(ss[4]); \
01271     ss[4] ^= k[4*(i)+2]; k[4*(i)+6] = ff(ss[4]); ss[4] ^= k[4*(i)+3]; k[4*(i)+7] = ff(ss[4]); \
01272 }
01273 #define kd4(k,i) \
01274 {   ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \
01275     k[4*(i)+4] = ss[4] ^= k[4*(i)]; k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \
01276     k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \
01277 }
01278 #define kdl4(k,i) \
01279 {   ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; ss[i % 4] ^= ss[4]; \
01280     k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; k[4*(i)+5] = ss[1] ^ ss[3]; \
01281     k[4*(i)+6] = ss[0]; k[4*(i)+7] = ss[1]; \
01282 }
01283 #else
01284 #define kdf4(k,i) \
01285 {   ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; k[4*(i)+ 4] = ff(ss[0]); ss[1] ^= ss[0]; k[4*(i)+ 5] = ff(ss[1]); \
01286     ss[2] ^= ss[1]; k[4*(i)+ 6] = ff(ss[2]); ss[3] ^= ss[2]; k[4*(i)+ 7] = ff(ss[3]); \
01287 }
01288 #define kd4(k,i) \
01289 {   ss[4] = ls_box(ss[3],3) ^ rcon_tab[i]; \
01290     ss[0] ^= ss[4]; ss[4] = ff(ss[4]); k[4*(i)+ 4] = ss[4] ^= k[4*(i)]; \
01291     ss[1] ^= ss[0]; k[4*(i)+ 5] = ss[4] ^= k[4*(i)+ 1]; \
01292     ss[2] ^= ss[1]; k[4*(i)+ 6] = ss[4] ^= k[4*(i)+ 2]; \
01293     ss[3] ^= ss[2]; k[4*(i)+ 7] = ss[4] ^= k[4*(i)+ 3]; \
01294 }
01295 #define kdl4(k,i) \
01296 {   ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; k[4*(i)+ 4] = ss[0]; ss[1] ^= ss[0]; k[4*(i)+ 5] = ss[1]; \
01297     ss[2] ^= ss[1]; k[4*(i)+ 6] = ss[2]; ss[3] ^= ss[2]; k[4*(i)+ 7] = ss[3]; \
01298 }
01299 #endif
01300 
01301 #define kdf6(k,i) \
01302 {   ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; k[6*(i)+ 6] = ff(ss[0]); ss[1] ^= ss[0]; k[6*(i)+ 7] = ff(ss[1]); \
01303     ss[2] ^= ss[1]; k[6*(i)+ 8] = ff(ss[2]); ss[3] ^= ss[2]; k[6*(i)+ 9] = ff(ss[3]); \
01304     ss[4] ^= ss[3]; k[6*(i)+10] = ff(ss[4]); ss[5] ^= ss[4]; k[6*(i)+11] = ff(ss[5]); \
01305 }
01306 #define kd6(k,i) \
01307 {   ss[6] = ls_box(ss[5],3) ^ rcon_tab[i]; \
01308     ss[0] ^= ss[6]; ss[6] = ff(ss[6]); k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \
01309     ss[1] ^= ss[0]; k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \
01310     ss[2] ^= ss[1]; k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \
01311     ss[3] ^= ss[2]; k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \
01312     ss[4] ^= ss[3]; k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \
01313     ss[5] ^= ss[4]; k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \
01314 }
01315 #define kdl6(k,i) \
01316 {   ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; k[6*(i)+ 6] = ss[0]; ss[1] ^= ss[0]; k[6*(i)+ 7] = ss[1]; \
01317     ss[2] ^= ss[1]; k[6*(i)+ 8] = ss[2]; ss[3] ^= ss[2]; k[6*(i)+ 9] = ss[3]; \
01318 }
01319 
01320 #define kdf8(k,i) \
01321 {   ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; k[8*(i)+ 8] = ff(ss[0]); ss[1] ^= ss[0]; k[8*(i)+ 9] = ff(ss[1]); \
01322     ss[2] ^= ss[1]; k[8*(i)+10] = ff(ss[2]); ss[3] ^= ss[2]; k[8*(i)+11] = ff(ss[3]); \
01323     ss[4] ^= ls_box(ss[3],0); k[8*(i)+12] = ff(ss[4]); ss[5] ^= ss[4]; k[8*(i)+13] = ff(ss[5]); \
01324     ss[6] ^= ss[5]; k[8*(i)+14] = ff(ss[6]); ss[7] ^= ss[6]; k[8*(i)+15] = ff(ss[7]); \
01325 }
01326 #define kd8(k,i) \
01327 {   aes_32t g = ls_box(ss[7],3) ^ rcon_tab[i]; \
01328     ss[0] ^= g; g = ff(g); k[8*(i)+ 8] = g ^= k[8*(i)]; \
01329     ss[1] ^= ss[0]; k[8*(i)+ 9] = g ^= k[8*(i)+ 1]; \
01330     ss[2] ^= ss[1]; k[8*(i)+10] = g ^= k[8*(i)+ 2]; \
01331     ss[3] ^= ss[2]; k[8*(i)+11] = g ^= k[8*(i)+ 3]; \
01332     g = ls_box(ss[3],0); \
01333     ss[4] ^= g; g = ff(g); k[8*(i)+12] = g ^= k[8*(i)+ 4]; \
01334     ss[5] ^= ss[4]; k[8*(i)+13] = g ^= k[8*(i)+ 5]; \
01335     ss[6] ^= ss[5]; k[8*(i)+14] = g ^= k[8*(i)+ 6]; \
01336     ss[7] ^= ss[6]; k[8*(i)+15] = g ^= k[8*(i)+ 7]; \
01337 }
01338 #define kdl8(k,i) \
01339 {   ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; k[8*(i)+ 8] = ss[0]; ss[1] ^= ss[0]; k[8*(i)+ 9] = ss[1]; \
01340     ss[2] ^= ss[1]; k[8*(i)+10] = ss[2]; ss[3] ^= ss[2]; k[8*(i)+11] = ss[3]; \
01341 }
01342 
01343 static aes_rval aes_dec_key(const unsigned char in_key[], unsigned int klen, aes_ctx cx[1])
01344 {   aes_32t    ss[8]; 
01345     d_vars
01346 
01347 #if !defined(FIXED_TABLES)
01348     if(!tab_init) gen_tabs();
01349 #endif
01350 
01351 #if !defined(BLOCK_SIZE)
01352     if(!cx->n_blk) cx->n_blk = 16;
01353 #else
01354     cx->n_blk = BLOCK_SIZE;
01355 #endif
01356 
01357     cx->n_blk = (cx->n_blk & ~3) | 2;
01358 
01359     cx->k_sch[0] = ss[0] = word_in(in_key     );
01360     cx->k_sch[1] = ss[1] = word_in(in_key +  4);
01361     cx->k_sch[2] = ss[2] = word_in(in_key +  8);
01362     cx->k_sch[3] = ss[3] = word_in(in_key + 12);
01363 
01364 #if (BLOCK_SIZE == 16) && (DEC_UNROLL != NONE)
01365 
01366     switch(klen)
01367     {
01368     case 16:    kdf4(cx->k_sch, 0); kd4(cx->k_sch, 1); 
01369                 kd4(cx->k_sch, 2); kd4(cx->k_sch, 3);
01370                 kd4(cx->k_sch, 4); kd4(cx->k_sch, 5); 
01371                 kd4(cx->k_sch, 6); kd4(cx->k_sch, 7);
01372                 kd4(cx->k_sch, 8); kdl4(cx->k_sch, 9); 
01373                 cx->n_rnd = 10; break;
01374     case 24:    cx->k_sch[4] = ff(ss[4] = word_in(in_key + 16));
01375                 cx->k_sch[5] = ff(ss[5] = word_in(in_key + 20));
01376                 kdf6(cx->k_sch, 0); kd6(cx->k_sch, 1); 
01377                 kd6(cx->k_sch, 2); kd6(cx->k_sch, 3);
01378                 kd6(cx->k_sch, 4); kd6(cx->k_sch, 5); 
01379                 kd6(cx->k_sch, 6); kdl6(cx->k_sch, 7); 
01380                 cx->n_rnd = 12; break;
01381     case 32:    cx->k_sch[4] = ff(ss[4] = word_in(in_key + 16));
01382                 cx->k_sch[5] = ff(ss[5] = word_in(in_key + 20));
01383                 cx->k_sch[6] = ff(ss[6] = word_in(in_key + 24));
01384                 cx->k_sch[7] = ff(ss[7] = word_in(in_key + 28));
01385                 kdf8(cx->k_sch, 0); kd8(cx->k_sch, 1); 
01386                 kd8(cx->k_sch, 2); kd8(cx->k_sch, 3);
01387                 kd8(cx->k_sch, 4); kd8(cx->k_sch, 5); 
01388                 kdl8(cx->k_sch, 6); 
01389                 cx->n_rnd = 14; break;
01390     default:    cx->n_rnd = 0; return aes_bad; 
01391     }
01392 #else
01393     {   aes_32t i, l;
01394         cx->n_rnd = ((klen >> 2) > nc ? (klen >> 2) : nc) + 6;
01395         l = (nc * cx->n_rnd + nc - 1) / (klen >> 2);
01396 
01397         switch(klen)
01398         {
01399         case 16: 
01400                     for(i = 0; i < l; ++i)
01401                         ke4(cx->k_sch, i);
01402                     break;
01403         case 24:    cx->k_sch[4] = ss[4] = word_in(in_key + 16);
01404                     cx->k_sch[5] = ss[5] = word_in(in_key + 20);
01405                     for(i = 0; i < l; ++i)
01406                         ke6(cx->k_sch, i);
01407                     break;
01408         case 32:    cx->k_sch[4] = ss[4] = word_in(in_key + 16);
01409                     cx->k_sch[5] = ss[5] = word_in(in_key + 20);
01410                     cx->k_sch[6] = ss[6] = word_in(in_key + 24);
01411                     cx->k_sch[7] = ss[7] = word_in(in_key + 28);
01412                     for(i = 0; i < l; ++i)
01413                         ke8(cx->k_sch,  i);
01414                     break;
01415         default:    cx->n_rnd = 0; return aes_bad; 
01416         }
01417 #if (DEC_ROUND != NO_TABLES)
01418         for(i = nc; i < nc * cx->n_rnd; ++i)
01419             cx->k_sch[i] = inv_mcol(cx->k_sch[i]);
01420 #endif
01421     }
01422 #endif
01423 
01424     return aes_good;
01425 }
01426 
01427 #endif
01428 
01429 /*----------------------------------------------------------------------
01430 |       cipher
01431 +---------------------------------------------------------------------*/
01432 #define unused  77  /* Sunset Strip */
01433 
01434 #define si(y,x,k,c) s(y,c) = word_in(x + 4 * c) ^ k[c]
01435 #define so(y,x,c)   word_out(y + 4 * c, s(x,c))
01436 
01437 #if BLOCK_SIZE == 16
01438 
01439 #if defined(ARRAYS)
01440 #define locals(y,x)     x[4],y[4]
01441 #else
01442 #define locals(y,x)     x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
01443  /* 
01444    the following defines prevent the compiler requiring the declaration
01445    of generated but unused variables in the fwd_var and inv_var macros
01446  */
01447 #define b04 unused
01448 #define b05 unused
01449 #define b06 unused
01450 #define b07 unused
01451 #define b14 unused
01452 #define b15 unused
01453 #define b16 unused
01454 #define b17 unused
01455 #endif
01456 #define l_copy(y, x)    s(y,0) = s(x,0); s(y,1) = s(x,1); \
01457                         s(y,2) = s(x,2); s(y,3) = s(x,3);
01458 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
01459 #define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
01460 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
01461 
01462 #elif BLOCK_SIZE == 24
01463 
01464 #if defined(ARRAYS)
01465 #define locals(y,x)     x[6],y[6]
01466 #else
01467 #define locals(y,x)     x##0,x##1,x##2,x##3,x##4,x##5, \
01468                         y##0,y##1,y##2,y##3,y##4,y##5
01469 #define b06 unused
01470 #define b07 unused
01471 #define b16 unused
01472 #define b17 unused
01473 #endif
01474 #define l_copy(y, x)    s(y,0) = s(x,0); s(y,1) = s(x,1); \
01475                         s(y,2) = s(x,2); s(y,3) = s(x,3); \
01476                         s(y,4) = s(x,4); s(y,5) = s(x,5);
01477 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); \
01478                         si(y,x,k,3); si(y,x,k,4); si(y,x,k,5)
01479 #define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); \
01480                         so(y,x,3); so(y,x,4); so(y,x,5)
01481 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); \
01482                         rm(y,x,k,3); rm(y,x,k,4); rm(y,x,k,5)
01483 #else
01484 
01485 #if defined(ARRAYS)
01486 #define locals(y,x)     x[8],y[8]
01487 #else
01488 #define locals(y,x)     x##0,x##1,x##2,x##3,x##4,x##5,x##6,x##7, \
01489                         y##0,y##1,y##2,y##3,y##4,y##5,y##6,y##7
01490 #endif
01491 #define l_copy(y, x)    s(y,0) = s(x,0); s(y,1) = s(x,1); \
01492                         s(y,2) = s(x,2); s(y,3) = s(x,3); \
01493                         s(y,4) = s(x,4); s(y,5) = s(x,5); \
01494                         s(y,6) = s(x,6); s(y,7) = s(x,7);
01495 
01496 #if BLOCK_SIZE == 32
01497 
01498 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3); \
01499                         si(y,x,k,4); si(y,x,k,5); si(y,x,k,6); si(y,x,k,7)
01500 #define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3); \
01501                         so(y,x,4); so(y,x,5); so(y,x,6); so(y,x,7)
01502 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3); \
01503                         rm(y,x,k,4); rm(y,x,k,5); rm(y,x,k,6); rm(y,x,k,7)
01504 #else
01505 
01506 #define state_in(y,x,k) \
01507 switch(nc) \
01508 {   case 8: si(y,x,k,7); si(y,x,k,6); \
01509     case 6: si(y,x,k,5); si(y,x,k,4); \
01510     case 4: si(y,x,k,3); si(y,x,k,2); \
01511             si(y,x,k,1); si(y,x,k,0); \
01512 }
01513 
01514 #define state_out(y,x) \
01515 switch(nc) \
01516 {   case 8: so(y,x,7); so(y,x,6); \
01517     case 6: so(y,x,5); so(y,x,4); \
01518     case 4: so(y,x,3); so(y,x,2); \
01519             so(y,x,1); so(y,x,0); \
01520 }
01521 
01522 #if defined(FAST_VARIABLE)
01523 
01524 #define round(rm,y,x,k) \
01525 switch(nc) \
01526 {   case 8: rm(y,x,k,7); rm(y,x,k,6); \
01527             rm(y,x,k,5); rm(y,x,k,4); \
01528             rm(y,x,k,3); rm(y,x,k,2); \
01529             rm(y,x,k,1); rm(y,x,k,0); \
01530             break; \
01531     case 6: rm(y,x,k,5); rm(y,x,k,4); \
01532             rm(y,x,k,3); rm(y,x,k,2); \
01533             rm(y,x,k,1); rm(y,x,k,0); \
01534             break; \
01535     case 4: rm(y,x,k,3); rm(y,x,k,2); \
01536             rm(y,x,k,1); rm(y,x,k,0); \
01537             break; \
01538 }
01539 #else
01540 
01541 #define round(rm,y,x,k) \
01542 switch(nc) \
01543 {   case 8: rm(y,x,k,7); rm(y,x,k,6); \
01544     case 6: rm(y,x,k,5); rm(y,x,k,4); \
01545     case 4: rm(y,x,k,3); rm(y,x,k,2); \
01546             rm(y,x,k,1); rm(y,x,k,0); \
01547 }
01548 
01549 #endif
01550 
01551 #endif
01552 #endif
01553 
01554 #if defined(ENCRYPTION)
01555 
01556 /* I am grateful to Frank Yellin for the following construction
01557    (and that for decryption) which, given the column (c) of the 
01558    output state variable, gives the input state variables which 
01559    are needed in its computation for each row (r) of the state.
01560 
01561    For the fixed block size options, compilers should be able to 
01562    reduce this complex expression (and the equivalent one for 
01563    decryption) to a static variable reference at compile time. 
01564    But for variable block size code, there will be some limbs on
01565    which conditional clauses will be returned.
01566 */
01567 
01568 /* y = output word, x = input word, r = row, c = column for r = 0, 
01569    1, 2 and 3 = column accessed for row r.
01570 */
01571 
01572 #define fwd_var(x,r,c)\
01573  ( r == 0 ?           \
01574     ( c == 0 ? s(x,0) \
01575     : c == 1 ? s(x,1) \
01576     : c == 2 ? s(x,2) \
01577     : c == 3 ? s(x,3) \
01578     : c == 4 ? s(x,4) \
01579     : c == 5 ? s(x,5) \
01580     : c == 6 ? s(x,6) \
01581     :          s(x,7))\
01582  : r == 1 ?           \
01583     ( c == 0 ? s(x,1) \
01584     : c == 1 ? s(x,2) \
01585     : c == 2 ? s(x,3) \
01586     : c == 3 ? nc == 4 ? s(x,0) : s(x,4) \
01587     : c == 4 ? s(x,5) \
01588     : c == 5 ? nc == 8 ? s(x,6) : s(x,0) \
01589     : c == 6 ? s(x,7) \
01590     :          s(x,0))\
01591  : r == 2 ?           \
01592     ( c == 0 ? nc == 8 ? s(x,3) : s(x,2) \
01593     : c == 1 ? nc == 8 ? s(x,4) : s(x,3) \
01594     : c == 2 ? nc == 4 ? s(x,0) : nc == 8 ? s(x,5) : s(x,4) \
01595     : c == 3 ? nc == 4 ? s(x,1) : nc == 8 ? s(x,6) : s(x,5) \
01596     : c == 4 ? nc == 8 ? s(x,7) : s(x,0) \
01597     : c == 5 ? nc == 8 ? s(x,0) : s(x,1) \
01598     : c == 6 ? s(x,1) \
01599     :          s(x,2))\
01600  :                    \
01601     ( c == 0 ? nc == 8 ? s(x,4) : s(x,3) \
01602     : c == 1 ? nc == 4 ? s(x,0) : nc == 8 ? s(x,5) : s(x,4) \
01603     : c == 2 ? nc == 4 ? s(x,1) : nc == 8 ? s(x,6) : s(x,5) \
01604     : c == 3 ? nc == 4 ? s(x,2) : nc == 8 ? s(x,7) : s(x,0) \
01605     : c == 4 ? nc == 8 ? s(x,0) : s(x,1) \
01606     : c == 5 ? nc == 8 ? s(x,1) : s(x,2) \
01607     : c == 6 ? s(x,2) \
01608     :          s(x,3)))
01609 
01610 #if defined(FT4_SET)
01611 #undef  dec_fmvars
01612 #define dec_fmvars
01613 #define fwd_rnd(y,x,k,c)    s(y,c)= (k)[c] ^ four_tables(x,ft_tab,fwd_var,rf1,c)
01614 #elif defined(FT1_SET)
01615 #undef  dec_fmvars
01616 #define dec_fmvars
01617 #define fwd_rnd(y,x,k,c)    s(y,c)= (k)[c] ^ one_table(x,upr,ft_tab,fwd_var,rf1,c)
01618 #else
01619 #define fwd_rnd(y,x,k,c)    s(y,c) = fwd_mcol(no_table(x,s_box,fwd_var,rf1,c)) ^ (k)[c]
01620 #endif
01621 
01622 #if defined(FL4_SET)
01623 #define fwd_lrnd(y,x,k,c)   s(y,c)= (k)[c] ^ four_tables(x,fl_tab,fwd_var,rf1,c)
01624 #elif defined(FL1_SET)
01625 #define fwd_lrnd(y,x,k,c)   s(y,c)= (k)[c] ^ one_table(x,ups,fl_tab,fwd_var,rf1,c)
01626 #else
01627 #define fwd_lrnd(y,x,k,c)   s(y,c) = no_table(x,s_box,fwd_var,rf1,c) ^ (k)[c]
01628 #endif
01629 
01630 static aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])
01631 {   aes_32t        locals(b0, b1);
01632     const aes_32t  *kp = cx->k_sch;
01633     dec_fmvars  /* declare variables for fwd_mcol() if needed */
01634 
01635     if(!(cx->n_blk & 1)) return aes_bad;
01636 
01637     state_in(b0, in_blk, kp); 
01638 
01639 #if (ENC_UNROLL == FULL)
01640 
01641     kp += (cx->n_rnd - 9) * nc;
01642 
01643     switch(cx->n_rnd)
01644     {
01645     case 14:    round(fwd_rnd,  b1, b0, kp - 4 * nc); 
01646                 round(fwd_rnd,  b0, b1, kp - 3 * nc);
01647     case 12:    round(fwd_rnd,  b1, b0, kp - 2 * nc); 
01648                 round(fwd_rnd,  b0, b1, kp -     nc);
01649     case 10:    round(fwd_rnd,  b1, b0, kp         );             
01650                 round(fwd_rnd,  b0, b1, kp +     nc);
01651                 round(fwd_rnd,  b1, b0, kp + 2 * nc); 
01652                 round(fwd_rnd,  b0, b1, kp + 3 * nc);
01653                 round(fwd_rnd,  b1, b0, kp + 4 * nc); 
01654                 round(fwd_rnd,  b0, b1, kp + 5 * nc);
01655                 round(fwd_rnd,  b1, b0, kp + 6 * nc); 
01656                 round(fwd_rnd,  b0, b1, kp + 7 * nc);
01657                 round(fwd_rnd,  b1, b0, kp + 8 * nc);
01658                 round(fwd_lrnd, b0, b1, kp + 9 * nc);
01659     }
01660 #else
01661     
01662 #if (ENC_UNROLL == PARTIAL)
01663     {   aes_32t    rnd;
01664         for(rnd = 0; rnd < (cx->n_rnd >> 1) - 1; ++rnd)
01665         {
01666             kp += nc;
01667             round(fwd_rnd, b1, b0, kp); 
01668             kp += nc;
01669             round(fwd_rnd, b0, b1, kp); 
01670         }
01671         kp += nc;
01672         round(fwd_rnd,  b1, b0, kp);
01673 #else
01674     {   aes_32t    rnd, *p0 = b0, *p1 = b1, *pt;
01675         for(rnd = 0; rnd < cx->n_rnd - 1; ++rnd)
01676         {
01677             kp += nc;
01678             round(fwd_rnd, p1, p0, kp); 
01679             pt = p0, p0 = p1, p1 = pt;
01680         }
01681 #endif
01682         kp += nc;
01683         round(fwd_lrnd, b0, b1, kp);
01684     }
01685 #endif
01686 
01687     state_out(out_blk, b0);
01688     return aes_good;
01689 }
01690 
01691 #endif
01692 
01693 #if defined(DECRYPTION)
01694 
01695 #define inv_var(x,r,c) \
01696  ( r == 0 ?           \
01697     ( c == 0 ? s(x,0) \
01698     : c == 1 ? s(x,1) \
01699     : c == 2 ? s(x,2) \
01700     : c == 3 ? s(x,3) \
01701     : c == 4 ? s(x,4) \
01702     : c == 5 ? s(x,5) \
01703     : c == 6 ? s(x,6) \
01704     :          s(x,7))\
01705  : r == 1 ?           \
01706     ( c == 0 ? nc == 4 ? s(x,3) : nc == 8 ? s(x,7) : s(x,5) \
01707     : c == 1 ? s(x,0) \
01708     : c == 2 ? s(x,1) \
01709     : c == 3 ? s(x,2) \
01710     : c == 4 ? s(x,3) \
01711     : c == 5 ? s(x,4) \
01712     : c == 6 ? s(x,5) \
01713     :          s(x,6))\
01714  : r == 2 ?           \
01715     ( c == 0 ? nc == 4 ? s(x,2) : nc == 8 ? s(x,5) : s(x,4) \
01716     : c == 1 ? nc == 4 ? s(x,3) : nc == 8 ? s(x,6) : s(x,5) \
01717     : c == 2 ? nc == 8 ? s(x,7) : s(x,0) \
01718     : c == 3 ? nc == 8 ? s(x,0) : s(x,1) \
01719     : c == 4 ? nc == 8 ? s(x,1) : s(x,2) \
01720     : c == 5 ? nc == 8 ? s(x,2) : s(x,3) \
01721     : c == 6 ? s(x,3) \
01722     :          s(x,4))\
01723  :                    \
01724     ( c == 0 ? nc == 4 ? s(x,1) : nc == 8 ? s(x,4) : s(x,3) \
01725     : c == 1 ? nc == 4 ? s(x,2) : nc == 8 ? s(x,5) : s(x,4) \
01726     : c == 2 ? nc == 4 ? s(x,3) : nc == 8 ? s(x,6) : s(x,5) \
01727     : c == 3 ? nc == 8 ? s(x,7) : s(x,0) \
01728     : c == 4 ? nc == 8 ? s(x,0) : s(x,1) \
01729     : c == 5 ? nc == 8 ? s(x,1) : s(x,2) \
01730     : c == 6 ? s(x,2) \
01731     :          s(x,3)))
01732 
01733 #if defined(IT4_SET)
01734 #undef  dec_imvars
01735 #define dec_imvars
01736 #define inv_rnd(y,x,k,c)    s(y,c)= (k)[c] ^ four_tables(x,it_tab,inv_var,rf1,c)
01737 #elif defined(IT1_SET)
01738 #undef  dec_imvars
01739 #define dec_imvars
01740 #define inv_rnd(y,x,k,c)    s(y,c)= (k)[c] ^ one_table(x,upr,it_tab,inv_var,rf1,c)
01741 #else
01742 #define inv_rnd(y,x,k,c)    s(y,c) = inv_mcol(no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c])
01743 #endif
01744 
01745 #if defined(IL4_SET)
01746 #define inv_lrnd(y,x,k,c)   s(y,c)= (k)[c] ^ four_tables(x,il_tab,inv_var,rf1,c)
01747 #elif defined(IL1_SET)
01748 #define inv_lrnd(y,x,k,c)   s(y,c)= (k)[c] ^ one_table(x,ups,il_tab,inv_var,rf1,c)
01749 #else
01750 #define inv_lrnd(y,x,k,c)   s(y,c) = no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c]
01751 #endif
01752 
01753 static aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])
01754 {   aes_32t        locals(b0, b1);
01755     const aes_32t  *kp = cx->k_sch + nc * cx->n_rnd;
01756     dec_imvars  /* declare variables for inv_mcol() if needed */
01757 
01758     if(!(cx->n_blk & 2)) return aes_bad;
01759 
01760     state_in(b0, in_blk, kp);
01761 
01762 #if (DEC_UNROLL == FULL)
01763 
01764     kp = cx->k_sch + 9 * nc;
01765     switch(cx->n_rnd)
01766     {
01767     case 14:    round(inv_rnd,  b1, b0, kp + 4 * nc);
01768                 round(inv_rnd,  b0, b1, kp + 3 * nc);
01769     case 12:    round(inv_rnd,  b1, b0, kp + 2 * nc);
01770                 round(inv_rnd,  b0, b1, kp + nc    );
01771     case 10:    round(inv_rnd,  b1, b0, kp         );             
01772                 round(inv_rnd,  b0, b1, kp -     nc);
01773                 round(inv_rnd,  b1, b0, kp - 2 * nc); 
01774                 round(inv_rnd,  b0, b1, kp - 3 * nc);
01775                 round(inv_rnd,  b1, b0, kp - 4 * nc); 
01776                 round(inv_rnd,  b0, b1, kp - 5 * nc);
01777                 round(inv_rnd,  b1, b0, kp - 6 * nc); 
01778                 round(inv_rnd,  b0, b1, kp - 7 * nc);
01779                 round(inv_rnd,  b1, b0, kp - 8 * nc);
01780                 round(inv_lrnd, b0, b1, kp - 9 * nc);
01781     }
01782 #else
01783     
01784 #if (DEC_UNROLL == PARTIAL)
01785     {   aes_32t    rnd;
01786         for(rnd = 0; rnd < (cx->n_rnd >> 1) - 1; ++rnd)
01787         {
01788             kp -= nc; 
01789             round(inv_rnd, b1, b0, kp); 
01790             kp -= nc; 
01791             round(inv_rnd, b0, b1, kp); 
01792         }
01793         kp -= nc;
01794         round(inv_rnd, b1, b0, kp);
01795 #else
01796     {   aes_32t    rnd, *p0 = b0, *p1 = b1, *pt;
01797         for(rnd = 0; rnd < cx->n_rnd - 1; ++rnd)
01798         {
01799             kp -= nc;
01800             round(inv_rnd, p1, p0, kp); 
01801             pt = p0, p0 = p1, p1 = pt;
01802         }
01803 #endif
01804         kp -= nc;
01805         round(inv_lrnd, b0, b1, kp);
01806     }
01807 #endif
01808 
01809     state_out(out_blk, b0);
01810     return aes_good;
01811 }
01812 
01813 #endif
01814 
01815 /*----------------------------------------------------------------------
01816 |       AP4_AesBlockCipher::AP4_AesBlockCipher
01817 +---------------------------------------------------------------------*/
01818 AP4_AesBlockCipher::AP4_AesBlockCipher(const AP4_UI08* key)
01819 {
01820     aes_enc_key(key, AP4_AES_KEY_LENGTH, &m_Context);
01821 }
01822 
01823 /*----------------------------------------------------------------------
01824 |       AP4_AesBlockCipher::~AP4_AesBlockCipher
01825 +---------------------------------------------------------------------*/
01826 AP4_AesBlockCipher::~AP4_AesBlockCipher()
01827 {
01828 }
01829 
01830 /*----------------------------------------------------------------------
01831 |       AP4_AesCipher::EncryptBlock
01832 +---------------------------------------------------------------------*/
01833 AP4_Result 
01834 AP4_AesBlockCipher::EncryptBlock(const AP4_UI08* block_in, AP4_UI08* block_out)
01835 {
01836     aes_rval result;
01837     result = aes_enc_blk(block_in, block_out, &m_Context);
01838     return result == aes_good ? AP4_SUCCESS : AP4_FAILURE;
01839 }
01840 

Generated on Tue Dec 13 14:47:20 2005 for guliverkli by  doxygen 1.4.5