OpenSSL  1.0.1c
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros
wp_block.c
Go to the documentation of this file.
1 
38 #include "wp_locl.h"
39 #include <string.h>
40 
41 typedef unsigned char u8;
42 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32)
43 typedef unsigned __int64 u64;
44 #elif defined(__arch64__)
45 typedef unsigned long u64;
46 #else
47 typedef unsigned long long u64;
48 #endif
49 
50 #define ROUNDS 10
51 
52 #define STRICT_ALIGNMENT
53 #if defined(__i386) || defined(__i386__) || \
54  defined(__x86_64) || defined(__x86_64__) || \
55  defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)
56 /* Well, formally there're couple of other architectures, which permit
57  * unaligned loads, specifically those not crossing cache lines, IA-64
58  * and PowerPC... */
59 # undef STRICT_ALIGNMENT
60 #endif
61 
62 #undef SMALL_REGISTER_BANK
63 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
64 # define SMALL_REGISTER_BANK
65 # if defined(WHIRLPOOL_ASM)
66 # ifndef OPENSSL_SMALL_FOOTPRINT
67 # define OPENSSL_SMALL_FOOTPRINT /* it appears that for elder non-MMX
68  CPUs this is actually faster! */
69 # endif
70 # define GO_FOR_MMX(ctx,inp,num) do { \
71  extern unsigned int OPENSSL_ia32cap_P[]; \
72  void whirlpool_block_mmx(void *,const void *,size_t); \
73  if (!(OPENSSL_ia32cap_P[0] & (1<<23))) break; \
74  whirlpool_block_mmx(ctx->H.c,inp,num); return; \
75  } while (0)
76 # endif
77 #endif
78 
79 #undef ROTATE
80 #if defined(_MSC_VER)
81 # if defined(_WIN64) /* applies to both IA-64 and AMD64 */
82 # pragma intrinsic(_rotl64)
83 # define ROTATE(a,n) _rotl64((a),n)
84 # endif
85 #elif defined(__GNUC__) && __GNUC__>=2
86 # if defined(__x86_64) || defined(__x86_64__)
87 # if defined(L_ENDIAN)
88 # define ROTATE(a,n) ({ u64 ret; asm ("rolq %1,%0" \
89  : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
90 # elif defined(B_ENDIAN)
91  /* Most will argue that x86_64 is always little-endian. Well,
92  * yes, but then we have stratus.com who has modified gcc to
93  * "emulate" big-endian on x86. Is there evidence that they
94  * [or somebody else] won't do same for x86_64? Naturally no.
95  * And this line is waiting ready for that brave soul:-) */
96 # define ROTATE(a,n) ({ u64 ret; asm ("rorq %1,%0" \
97  : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
98 # endif
99 # elif defined(__ia64) || defined(__ia64__)
100 # if defined(L_ENDIAN)
101 # define ROTATE(a,n) ({ u64 ret; asm ("shrp %0=%1,%1,%2" \
102  : "=r"(ret) : "r"(a),"M"(64-(n))); ret; })
103 # elif defined(B_ENDIAN)
104 # define ROTATE(a,n) ({ u64 ret; asm ("shrp %0=%1,%1,%2" \
105  : "=r"(ret) : "r"(a),"M"(n)); ret; })
106 # endif
107 # endif
108 #endif
109 
110 #if defined(OPENSSL_SMALL_FOOTPRINT)
111 # if !defined(ROTATE)
112 # if defined(L_ENDIAN) /* little-endians have to rotate left */
113 # define ROTATE(i,n) ((i)<<(n) ^ (i)>>(64-n))
114 # elif defined(B_ENDIAN) /* big-endians have to rotate right */
115 # define ROTATE(i,n) ((i)>>(n) ^ (i)<<(64-n))
116 # endif
117 # endif
118 # if defined(ROTATE) && !defined(STRICT_ALIGNMENT)
119 # define STRICT_ALIGNMENT /* ensure smallest table size */
120 # endif
121 #endif
122 
123 /*
124  * Table size depends on STRICT_ALIGNMENT and whether or not endian-
125  * specific ROTATE macro is defined. If STRICT_ALIGNMENT is not
126  * defined, which is normally the case on x86[_64] CPUs, the table is
127  * 4KB large unconditionally. Otherwise if ROTATE is defined, the
128  * table is 2KB large, and otherwise - 16KB. 2KB table requires a
129  * whole bunch of additional rotations, but I'm willing to "trade,"
130  * because 16KB table certainly trashes L1 cache. I wish all CPUs
131  * could handle unaligned load as 4KB table doesn't trash the cache,
132  * nor does it require additional rotations.
133  */
134 /*
135  * Note that every Cn macro expands as two loads: one byte load and
136  * one quadword load. One can argue that that many single-byte loads
137  * is too excessive, as one could load a quadword and "milk" it for
138  * eight 8-bit values instead. Well, yes, but in order to do so *and*
139  * avoid excessive loads you have to accomodate a handful of 64-bit
140  * values in the register bank and issue a bunch of shifts and mask.
141  * It's a tradeoff: loads vs. shift and mask in big register bank[!].
142  * On most CPUs eight single-byte loads are faster and I let other
143  * ones to depend on smart compiler to fold byte loads if beneficial.
144  * Hand-coded assembler would be another alternative:-)
145  */
146 #ifdef STRICT_ALIGNMENT
147 # if defined(ROTATE)
148 # define N 1
149 # define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7
150 # define C0(K,i) (Cx.q[K.c[(i)*8+0]])
151 # define C1(K,i) ROTATE(Cx.q[K.c[(i)*8+1]],8)
152 # define C2(K,i) ROTATE(Cx.q[K.c[(i)*8+2]],16)
153 # define C3(K,i) ROTATE(Cx.q[K.c[(i)*8+3]],24)
154 # define C4(K,i) ROTATE(Cx.q[K.c[(i)*8+4]],32)
155 # define C5(K,i) ROTATE(Cx.q[K.c[(i)*8+5]],40)
156 # define C6(K,i) ROTATE(Cx.q[K.c[(i)*8+6]],48)
157 # define C7(K,i) ROTATE(Cx.q[K.c[(i)*8+7]],56)
158 # else
159 # define N 8
160 # define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7, \
161  c7,c0,c1,c2,c3,c4,c5,c6, \
162  c6,c7,c0,c1,c2,c3,c4,c5, \
163  c5,c6,c7,c0,c1,c2,c3,c4, \
164  c4,c5,c6,c7,c0,c1,c2,c3, \
165  c3,c4,c5,c6,c7,c0,c1,c2, \
166  c2,c3,c4,c5,c6,c7,c0,c1, \
167  c1,c2,c3,c4,c5,c6,c7,c0
168 # define C0(K,i) (Cx.q[0+8*K.c[(i)*8+0]])
169 # define C1(K,i) (Cx.q[1+8*K.c[(i)*8+1]])
170 # define C2(K,i) (Cx.q[2+8*K.c[(i)*8+2]])
171 # define C3(K,i) (Cx.q[3+8*K.c[(i)*8+3]])
172 # define C4(K,i) (Cx.q[4+8*K.c[(i)*8+4]])
173 # define C5(K,i) (Cx.q[5+8*K.c[(i)*8+5]])
174 # define C6(K,i) (Cx.q[6+8*K.c[(i)*8+6]])
175 # define C7(K,i) (Cx.q[7+8*K.c[(i)*8+7]])
176 # endif
177 #else
178 # define N 2
179 # define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7, \
180  c0,c1,c2,c3,c4,c5,c6,c7
181 # define C0(K,i) (((u64*)(Cx.c+0))[2*K.c[(i)*8+0]])
182 # define C1(K,i) (((u64*)(Cx.c+7))[2*K.c[(i)*8+1]])
183 # define C2(K,i) (((u64*)(Cx.c+6))[2*K.c[(i)*8+2]])
184 # define C3(K,i) (((u64*)(Cx.c+5))[2*K.c[(i)*8+3]])
185 # define C4(K,i) (((u64*)(Cx.c+4))[2*K.c[(i)*8+4]])
186 # define C5(K,i) (((u64*)(Cx.c+3))[2*K.c[(i)*8+5]])
187 # define C6(K,i) (((u64*)(Cx.c+2))[2*K.c[(i)*8+6]])
188 # define C7(K,i) (((u64*)(Cx.c+1))[2*K.c[(i)*8+7]])
189 #endif
190 
191 static const
192 union {
193  u8 c[(256*N+ROUNDS)*sizeof(u64)];
194  u64 q[(256*N+ROUNDS)];
195  } Cx = { {
196  /* Note endian-neutral representation:-) */
197  LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8),
198  LL(0x23,0x23,0x8c,0x23,0x05,0xaf,0x46,0x26),
199  LL(0xc6,0xc6,0x3f,0xc6,0x7e,0xf9,0x91,0xb8),
200  LL(0xe8,0xe8,0x87,0xe8,0x13,0x6f,0xcd,0xfb),
201  LL(0x87,0x87,0x26,0x87,0x4c,0xa1,0x13,0xcb),
202  LL(0xb8,0xb8,0xda,0xb8,0xa9,0x62,0x6d,0x11),
203  LL(0x01,0x01,0x04,0x01,0x08,0x05,0x02,0x09),
204  LL(0x4f,0x4f,0x21,0x4f,0x42,0x6e,0x9e,0x0d),
205  LL(0x36,0x36,0xd8,0x36,0xad,0xee,0x6c,0x9b),
206  LL(0xa6,0xa6,0xa2,0xa6,0x59,0x04,0x51,0xff),
207  LL(0xd2,0xd2,0x6f,0xd2,0xde,0xbd,0xb9,0x0c),
208  LL(0xf5,0xf5,0xf3,0xf5,0xfb,0x06,0xf7,0x0e),
209  LL(0x79,0x79,0xf9,0x79,0xef,0x80,0xf2,0x96),
210  LL(0x6f,0x6f,0xa1,0x6f,0x5f,0xce,0xde,0x30),
211  LL(0x91,0x91,0x7e,0x91,0xfc,0xef,0x3f,0x6d),
212  LL(0x52,0x52,0x55,0x52,0xaa,0x07,0xa4,0xf8),
213  LL(0x60,0x60,0x9d,0x60,0x27,0xfd,0xc0,0x47),
214  LL(0xbc,0xbc,0xca,0xbc,0x89,0x76,0x65,0x35),
215  LL(0x9b,0x9b,0x56,0x9b,0xac,0xcd,0x2b,0x37),
216  LL(0x8e,0x8e,0x02,0x8e,0x04,0x8c,0x01,0x8a),
217  LL(0xa3,0xa3,0xb6,0xa3,0x71,0x15,0x5b,0xd2),
218  LL(0x0c,0x0c,0x30,0x0c,0x60,0x3c,0x18,0x6c),
219  LL(0x7b,0x7b,0xf1,0x7b,0xff,0x8a,0xf6,0x84),
220  LL(0x35,0x35,0xd4,0x35,0xb5,0xe1,0x6a,0x80),
221  LL(0x1d,0x1d,0x74,0x1d,0xe8,0x69,0x3a,0xf5),
222  LL(0xe0,0xe0,0xa7,0xe0,0x53,0x47,0xdd,0xb3),
223  LL(0xd7,0xd7,0x7b,0xd7,0xf6,0xac,0xb3,0x21),
224  LL(0xc2,0xc2,0x2f,0xc2,0x5e,0xed,0x99,0x9c),
225  LL(0x2e,0x2e,0xb8,0x2e,0x6d,0x96,0x5c,0x43),
226  LL(0x4b,0x4b,0x31,0x4b,0x62,0x7a,0x96,0x29),
227  LL(0xfe,0xfe,0xdf,0xfe,0xa3,0x21,0xe1,0x5d),
228  LL(0x57,0x57,0x41,0x57,0x82,0x16,0xae,0xd5),
229  LL(0x15,0x15,0x54,0x15,0xa8,0x41,0x2a,0xbd),
230  LL(0x77,0x77,0xc1,0x77,0x9f,0xb6,0xee,0xe8),
231  LL(0x37,0x37,0xdc,0x37,0xa5,0xeb,0x6e,0x92),
232  LL(0xe5,0xe5,0xb3,0xe5,0x7b,0x56,0xd7,0x9e),
233  LL(0x9f,0x9f,0x46,0x9f,0x8c,0xd9,0x23,0x13),
234  LL(0xf0,0xf0,0xe7,0xf0,0xd3,0x17,0xfd,0x23),
235  LL(0x4a,0x4a,0x35,0x4a,0x6a,0x7f,0x94,0x20),
236  LL(0xda,0xda,0x4f,0xda,0x9e,0x95,0xa9,0x44),
237  LL(0x58,0x58,0x7d,0x58,0xfa,0x25,0xb0,0xa2),
238  LL(0xc9,0xc9,0x03,0xc9,0x06,0xca,0x8f,0xcf),
239  LL(0x29,0x29,0xa4,0x29,0x55,0x8d,0x52,0x7c),
240  LL(0x0a,0x0a,0x28,0x0a,0x50,0x22,0x14,0x5a),
241  LL(0xb1,0xb1,0xfe,0xb1,0xe1,0x4f,0x7f,0x50),
242  LL(0xa0,0xa0,0xba,0xa0,0x69,0x1a,0x5d,0xc9),
243  LL(0x6b,0x6b,0xb1,0x6b,0x7f,0xda,0xd6,0x14),
244  LL(0x85,0x85,0x2e,0x85,0x5c,0xab,0x17,0xd9),
245  LL(0xbd,0xbd,0xce,0xbd,0x81,0x73,0x67,0x3c),
246  LL(0x5d,0x5d,0x69,0x5d,0xd2,0x34,0xba,0x8f),
247  LL(0x10,0x10,0x40,0x10,0x80,0x50,0x20,0x90),
248  LL(0xf4,0xf4,0xf7,0xf4,0xf3,0x03,0xf5,0x07),
249  LL(0xcb,0xcb,0x0b,0xcb,0x16,0xc0,0x8b,0xdd),
250  LL(0x3e,0x3e,0xf8,0x3e,0xed,0xc6,0x7c,0xd3),
251  LL(0x05,0x05,0x14,0x05,0x28,0x11,0x0a,0x2d),
252  LL(0x67,0x67,0x81,0x67,0x1f,0xe6,0xce,0x78),
253  LL(0xe4,0xe4,0xb7,0xe4,0x73,0x53,0xd5,0x97),
254  LL(0x27,0x27,0x9c,0x27,0x25,0xbb,0x4e,0x02),
255  LL(0x41,0x41,0x19,0x41,0x32,0x58,0x82,0x73),
256  LL(0x8b,0x8b,0x16,0x8b,0x2c,0x9d,0x0b,0xa7),
257  LL(0xa7,0xa7,0xa6,0xa7,0x51,0x01,0x53,0xf6),
258  LL(0x7d,0x7d,0xe9,0x7d,0xcf,0x94,0xfa,0xb2),
259  LL(0x95,0x95,0x6e,0x95,0xdc,0xfb,0x37,0x49),
260  LL(0xd8,0xd8,0x47,0xd8,0x8e,0x9f,0xad,0x56),
261  LL(0xfb,0xfb,0xcb,0xfb,0x8b,0x30,0xeb,0x70),
262  LL(0xee,0xee,0x9f,0xee,0x23,0x71,0xc1,0xcd),
263  LL(0x7c,0x7c,0xed,0x7c,0xc7,0x91,0xf8,0xbb),
264  LL(0x66,0x66,0x85,0x66,0x17,0xe3,0xcc,0x71),
265  LL(0xdd,0xdd,0x53,0xdd,0xa6,0x8e,0xa7,0x7b),
266  LL(0x17,0x17,0x5c,0x17,0xb8,0x4b,0x2e,0xaf),
267  LL(0x47,0x47,0x01,0x47,0x02,0x46,0x8e,0x45),
268  LL(0x9e,0x9e,0x42,0x9e,0x84,0xdc,0x21,0x1a),
269  LL(0xca,0xca,0x0f,0xca,0x1e,0xc5,0x89,0xd4),
270  LL(0x2d,0x2d,0xb4,0x2d,0x75,0x99,0x5a,0x58),
271  LL(0xbf,0xbf,0xc6,0xbf,0x91,0x79,0x63,0x2e),
272  LL(0x07,0x07,0x1c,0x07,0x38,0x1b,0x0e,0x3f),
273  LL(0xad,0xad,0x8e,0xad,0x01,0x23,0x47,0xac),
274  LL(0x5a,0x5a,0x75,0x5a,0xea,0x2f,0xb4,0xb0),
275  LL(0x83,0x83,0x36,0x83,0x6c,0xb5,0x1b,0xef),
276  LL(0x33,0x33,0xcc,0x33,0x85,0xff,0x66,0xb6),
277  LL(0x63,0x63,0x91,0x63,0x3f,0xf2,0xc6,0x5c),
278  LL(0x02,0x02,0x08,0x02,0x10,0x0a,0x04,0x12),
279  LL(0xaa,0xaa,0x92,0xaa,0x39,0x38,0x49,0x93),
280  LL(0x71,0x71,0xd9,0x71,0xaf,0xa8,0xe2,0xde),
281  LL(0xc8,0xc8,0x07,0xc8,0x0e,0xcf,0x8d,0xc6),
282  LL(0x19,0x19,0x64,0x19,0xc8,0x7d,0x32,0xd1),
283  LL(0x49,0x49,0x39,0x49,0x72,0x70,0x92,0x3b),
284  LL(0xd9,0xd9,0x43,0xd9,0x86,0x9a,0xaf,0x5f),
285  LL(0xf2,0xf2,0xef,0xf2,0xc3,0x1d,0xf9,0x31),
286  LL(0xe3,0xe3,0xab,0xe3,0x4b,0x48,0xdb,0xa8),
287  LL(0x5b,0x5b,0x71,0x5b,0xe2,0x2a,0xb6,0xb9),
288  LL(0x88,0x88,0x1a,0x88,0x34,0x92,0x0d,0xbc),
289  LL(0x9a,0x9a,0x52,0x9a,0xa4,0xc8,0x29,0x3e),
290  LL(0x26,0x26,0x98,0x26,0x2d,0xbe,0x4c,0x0b),
291  LL(0x32,0x32,0xc8,0x32,0x8d,0xfa,0x64,0xbf),
292  LL(0xb0,0xb0,0xfa,0xb0,0xe9,0x4a,0x7d,0x59),
293  LL(0xe9,0xe9,0x83,0xe9,0x1b,0x6a,0xcf,0xf2),
294  LL(0x0f,0x0f,0x3c,0x0f,0x78,0x33,0x1e,0x77),
295  LL(0xd5,0xd5,0x73,0xd5,0xe6,0xa6,0xb7,0x33),
296  LL(0x80,0x80,0x3a,0x80,0x74,0xba,0x1d,0xf4),
297  LL(0xbe,0xbe,0xc2,0xbe,0x99,0x7c,0x61,0x27),
298  LL(0xcd,0xcd,0x13,0xcd,0x26,0xde,0x87,0xeb),
299  LL(0x34,0x34,0xd0,0x34,0xbd,0xe4,0x68,0x89),
300  LL(0x48,0x48,0x3d,0x48,0x7a,0x75,0x90,0x32),
301  LL(0xff,0xff,0xdb,0xff,0xab,0x24,0xe3,0x54),
302  LL(0x7a,0x7a,0xf5,0x7a,0xf7,0x8f,0xf4,0x8d),
303  LL(0x90,0x90,0x7a,0x90,0xf4,0xea,0x3d,0x64),
304  LL(0x5f,0x5f,0x61,0x5f,0xc2,0x3e,0xbe,0x9d),
305  LL(0x20,0x20,0x80,0x20,0x1d,0xa0,0x40,0x3d),
306  LL(0x68,0x68,0xbd,0x68,0x67,0xd5,0xd0,0x0f),
307  LL(0x1a,0x1a,0x68,0x1a,0xd0,0x72,0x34,0xca),
308  LL(0xae,0xae,0x82,0xae,0x19,0x2c,0x41,0xb7),
309  LL(0xb4,0xb4,0xea,0xb4,0xc9,0x5e,0x75,0x7d),
310  LL(0x54,0x54,0x4d,0x54,0x9a,0x19,0xa8,0xce),
311  LL(0x93,0x93,0x76,0x93,0xec,0xe5,0x3b,0x7f),
312  LL(0x22,0x22,0x88,0x22,0x0d,0xaa,0x44,0x2f),
313  LL(0x64,0x64,0x8d,0x64,0x07,0xe9,0xc8,0x63),
314  LL(0xf1,0xf1,0xe3,0xf1,0xdb,0x12,0xff,0x2a),
315  LL(0x73,0x73,0xd1,0x73,0xbf,0xa2,0xe6,0xcc),
316  LL(0x12,0x12,0x48,0x12,0x90,0x5a,0x24,0x82),
317  LL(0x40,0x40,0x1d,0x40,0x3a,0x5d,0x80,0x7a),
318  LL(0x08,0x08,0x20,0x08,0x40,0x28,0x10,0x48),
319  LL(0xc3,0xc3,0x2b,0xc3,0x56,0xe8,0x9b,0x95),
320  LL(0xec,0xec,0x97,0xec,0x33,0x7b,0xc5,0xdf),
321  LL(0xdb,0xdb,0x4b,0xdb,0x96,0x90,0xab,0x4d),
322  LL(0xa1,0xa1,0xbe,0xa1,0x61,0x1f,0x5f,0xc0),
323  LL(0x8d,0x8d,0x0e,0x8d,0x1c,0x83,0x07,0x91),
324  LL(0x3d,0x3d,0xf4,0x3d,0xf5,0xc9,0x7a,0xc8),
325  LL(0x97,0x97,0x66,0x97,0xcc,0xf1,0x33,0x5b),
326  LL(0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00),
327  LL(0xcf,0xcf,0x1b,0xcf,0x36,0xd4,0x83,0xf9),
328  LL(0x2b,0x2b,0xac,0x2b,0x45,0x87,0x56,0x6e),
329  LL(0x76,0x76,0xc5,0x76,0x97,0xb3,0xec,0xe1),
330  LL(0x82,0x82,0x32,0x82,0x64,0xb0,0x19,0xe6),
331  LL(0xd6,0xd6,0x7f,0xd6,0xfe,0xa9,0xb1,0x28),
332  LL(0x1b,0x1b,0x6c,0x1b,0xd8,0x77,0x36,0xc3),
333  LL(0xb5,0xb5,0xee,0xb5,0xc1,0x5b,0x77,0x74),
334  LL(0xaf,0xaf,0x86,0xaf,0x11,0x29,0x43,0xbe),
335  LL(0x6a,0x6a,0xb5,0x6a,0x77,0xdf,0xd4,0x1d),
336  LL(0x50,0x50,0x5d,0x50,0xba,0x0d,0xa0,0xea),
337  LL(0x45,0x45,0x09,0x45,0x12,0x4c,0x8a,0x57),
338  LL(0xf3,0xf3,0xeb,0xf3,0xcb,0x18,0xfb,0x38),
339  LL(0x30,0x30,0xc0,0x30,0x9d,0xf0,0x60,0xad),
340  LL(0xef,0xef,0x9b,0xef,0x2b,0x74,0xc3,0xc4),
341  LL(0x3f,0x3f,0xfc,0x3f,0xe5,0xc3,0x7e,0xda),
342  LL(0x55,0x55,0x49,0x55,0x92,0x1c,0xaa,0xc7),
343  LL(0xa2,0xa2,0xb2,0xa2,0x79,0x10,0x59,0xdb),
344  LL(0xea,0xea,0x8f,0xea,0x03,0x65,0xc9,0xe9),
345  LL(0x65,0x65,0x89,0x65,0x0f,0xec,0xca,0x6a),
346  LL(0xba,0xba,0xd2,0xba,0xb9,0x68,0x69,0x03),
347  LL(0x2f,0x2f,0xbc,0x2f,0x65,0x93,0x5e,0x4a),
348  LL(0xc0,0xc0,0x27,0xc0,0x4e,0xe7,0x9d,0x8e),
349  LL(0xde,0xde,0x5f,0xde,0xbe,0x81,0xa1,0x60),
350  LL(0x1c,0x1c,0x70,0x1c,0xe0,0x6c,0x38,0xfc),
351  LL(0xfd,0xfd,0xd3,0xfd,0xbb,0x2e,0xe7,0x46),
352  LL(0x4d,0x4d,0x29,0x4d,0x52,0x64,0x9a,0x1f),
353  LL(0x92,0x92,0x72,0x92,0xe4,0xe0,0x39,0x76),
354  LL(0x75,0x75,0xc9,0x75,0x8f,0xbc,0xea,0xfa),
355  LL(0x06,0x06,0x18,0x06,0x30,0x1e,0x0c,0x36),
356  LL(0x8a,0x8a,0x12,0x8a,0x24,0x98,0x09,0xae),
357  LL(0xb2,0xb2,0xf2,0xb2,0xf9,0x40,0x79,0x4b),
358  LL(0xe6,0xe6,0xbf,0xe6,0x63,0x59,0xd1,0x85),
359  LL(0x0e,0x0e,0x38,0x0e,0x70,0x36,0x1c,0x7e),
360  LL(0x1f,0x1f,0x7c,0x1f,0xf8,0x63,0x3e,0xe7),
361  LL(0x62,0x62,0x95,0x62,0x37,0xf7,0xc4,0x55),
362  LL(0xd4,0xd4,0x77,0xd4,0xee,0xa3,0xb5,0x3a),
363  LL(0xa8,0xa8,0x9a,0xa8,0x29,0x32,0x4d,0x81),
364  LL(0x96,0x96,0x62,0x96,0xc4,0xf4,0x31,0x52),
365  LL(0xf9,0xf9,0xc3,0xf9,0x9b,0x3a,0xef,0x62),
366  LL(0xc5,0xc5,0x33,0xc5,0x66,0xf6,0x97,0xa3),
367  LL(0x25,0x25,0x94,0x25,0x35,0xb1,0x4a,0x10),
368  LL(0x59,0x59,0x79,0x59,0xf2,0x20,0xb2,0xab),
369  LL(0x84,0x84,0x2a,0x84,0x54,0xae,0x15,0xd0),
370  LL(0x72,0x72,0xd5,0x72,0xb7,0xa7,0xe4,0xc5),
371  LL(0x39,0x39,0xe4,0x39,0xd5,0xdd,0x72,0xec),
372  LL(0x4c,0x4c,0x2d,0x4c,0x5a,0x61,0x98,0x16),
373  LL(0x5e,0x5e,0x65,0x5e,0xca,0x3b,0xbc,0x94),
374  LL(0x78,0x78,0xfd,0x78,0xe7,0x85,0xf0,0x9f),
375  LL(0x38,0x38,0xe0,0x38,0xdd,0xd8,0x70,0xe5),
376  LL(0x8c,0x8c,0x0a,0x8c,0x14,0x86,0x05,0x98),
377  LL(0xd1,0xd1,0x63,0xd1,0xc6,0xb2,0xbf,0x17),
378  LL(0xa5,0xa5,0xae,0xa5,0x41,0x0b,0x57,0xe4),
379  LL(0xe2,0xe2,0xaf,0xe2,0x43,0x4d,0xd9,0xa1),
380  LL(0x61,0x61,0x99,0x61,0x2f,0xf8,0xc2,0x4e),
381  LL(0xb3,0xb3,0xf6,0xb3,0xf1,0x45,0x7b,0x42),
382  LL(0x21,0x21,0x84,0x21,0x15,0xa5,0x42,0x34),
383  LL(0x9c,0x9c,0x4a,0x9c,0x94,0xd6,0x25,0x08),
384  LL(0x1e,0x1e,0x78,0x1e,0xf0,0x66,0x3c,0xee),
385  LL(0x43,0x43,0x11,0x43,0x22,0x52,0x86,0x61),
386  LL(0xc7,0xc7,0x3b,0xc7,0x76,0xfc,0x93,0xb1),
387  LL(0xfc,0xfc,0xd7,0xfc,0xb3,0x2b,0xe5,0x4f),
388  LL(0x04,0x04,0x10,0x04,0x20,0x14,0x08,0x24),
389  LL(0x51,0x51,0x59,0x51,0xb2,0x08,0xa2,0xe3),
390  LL(0x99,0x99,0x5e,0x99,0xbc,0xc7,0x2f,0x25),
391  LL(0x6d,0x6d,0xa9,0x6d,0x4f,0xc4,0xda,0x22),
392  LL(0x0d,0x0d,0x34,0x0d,0x68,0x39,0x1a,0x65),
393  LL(0xfa,0xfa,0xcf,0xfa,0x83,0x35,0xe9,0x79),
394  LL(0xdf,0xdf,0x5b,0xdf,0xb6,0x84,0xa3,0x69),
395  LL(0x7e,0x7e,0xe5,0x7e,0xd7,0x9b,0xfc,0xa9),
396  LL(0x24,0x24,0x90,0x24,0x3d,0xb4,0x48,0x19),
397  LL(0x3b,0x3b,0xec,0x3b,0xc5,0xd7,0x76,0xfe),
398  LL(0xab,0xab,0x96,0xab,0x31,0x3d,0x4b,0x9a),
399  LL(0xce,0xce,0x1f,0xce,0x3e,0xd1,0x81,0xf0),
400  LL(0x11,0x11,0x44,0x11,0x88,0x55,0x22,0x99),
401  LL(0x8f,0x8f,0x06,0x8f,0x0c,0x89,0x03,0x83),
402  LL(0x4e,0x4e,0x25,0x4e,0x4a,0x6b,0x9c,0x04),
403  LL(0xb7,0xb7,0xe6,0xb7,0xd1,0x51,0x73,0x66),
404  LL(0xeb,0xeb,0x8b,0xeb,0x0b,0x60,0xcb,0xe0),
405  LL(0x3c,0x3c,0xf0,0x3c,0xfd,0xcc,0x78,0xc1),
406  LL(0x81,0x81,0x3e,0x81,0x7c,0xbf,0x1f,0xfd),
407  LL(0x94,0x94,0x6a,0x94,0xd4,0xfe,0x35,0x40),
408  LL(0xf7,0xf7,0xfb,0xf7,0xeb,0x0c,0xf3,0x1c),
409  LL(0xb9,0xb9,0xde,0xb9,0xa1,0x67,0x6f,0x18),
410  LL(0x13,0x13,0x4c,0x13,0x98,0x5f,0x26,0x8b),
411  LL(0x2c,0x2c,0xb0,0x2c,0x7d,0x9c,0x58,0x51),
412  LL(0xd3,0xd3,0x6b,0xd3,0xd6,0xb8,0xbb,0x05),
413  LL(0xe7,0xe7,0xbb,0xe7,0x6b,0x5c,0xd3,0x8c),
414  LL(0x6e,0x6e,0xa5,0x6e,0x57,0xcb,0xdc,0x39),
415  LL(0xc4,0xc4,0x37,0xc4,0x6e,0xf3,0x95,0xaa),
416  LL(0x03,0x03,0x0c,0x03,0x18,0x0f,0x06,0x1b),
417  LL(0x56,0x56,0x45,0x56,0x8a,0x13,0xac,0xdc),
418  LL(0x44,0x44,0x0d,0x44,0x1a,0x49,0x88,0x5e),
419  LL(0x7f,0x7f,0xe1,0x7f,0xdf,0x9e,0xfe,0xa0),
420  LL(0xa9,0xa9,0x9e,0xa9,0x21,0x37,0x4f,0x88),
421  LL(0x2a,0x2a,0xa8,0x2a,0x4d,0x82,0x54,0x67),
422  LL(0xbb,0xbb,0xd6,0xbb,0xb1,0x6d,0x6b,0x0a),
423  LL(0xc1,0xc1,0x23,0xc1,0x46,0xe2,0x9f,0x87),
424  LL(0x53,0x53,0x51,0x53,0xa2,0x02,0xa6,0xf1),
425  LL(0xdc,0xdc,0x57,0xdc,0xae,0x8b,0xa5,0x72),
426  LL(0x0b,0x0b,0x2c,0x0b,0x58,0x27,0x16,0x53),
427  LL(0x9d,0x9d,0x4e,0x9d,0x9c,0xd3,0x27,0x01),
428  LL(0x6c,0x6c,0xad,0x6c,0x47,0xc1,0xd8,0x2b),
429  LL(0x31,0x31,0xc4,0x31,0x95,0xf5,0x62,0xa4),
430  LL(0x74,0x74,0xcd,0x74,0x87,0xb9,0xe8,0xf3),
431  LL(0xf6,0xf6,0xff,0xf6,0xe3,0x09,0xf1,0x15),
432  LL(0x46,0x46,0x05,0x46,0x0a,0x43,0x8c,0x4c),
433  LL(0xac,0xac,0x8a,0xac,0x09,0x26,0x45,0xa5),
434  LL(0x89,0x89,0x1e,0x89,0x3c,0x97,0x0f,0xb5),
435  LL(0x14,0x14,0x50,0x14,0xa0,0x44,0x28,0xb4),
436  LL(0xe1,0xe1,0xa3,0xe1,0x5b,0x42,0xdf,0xba),
437  LL(0x16,0x16,0x58,0x16,0xb0,0x4e,0x2c,0xa6),
438  LL(0x3a,0x3a,0xe8,0x3a,0xcd,0xd2,0x74,0xf7),
439  LL(0x69,0x69,0xb9,0x69,0x6f,0xd0,0xd2,0x06),
440  LL(0x09,0x09,0x24,0x09,0x48,0x2d,0x12,0x41),
441  LL(0x70,0x70,0xdd,0x70,0xa7,0xad,0xe0,0xd7),
442  LL(0xb6,0xb6,0xe2,0xb6,0xd9,0x54,0x71,0x6f),
443  LL(0xd0,0xd0,0x67,0xd0,0xce,0xb7,0xbd,0x1e),
444  LL(0xed,0xed,0x93,0xed,0x3b,0x7e,0xc7,0xd6),
445  LL(0xcc,0xcc,0x17,0xcc,0x2e,0xdb,0x85,0xe2),
446  LL(0x42,0x42,0x15,0x42,0x2a,0x57,0x84,0x68),
447  LL(0x98,0x98,0x5a,0x98,0xb4,0xc2,0x2d,0x2c),
448  LL(0xa4,0xa4,0xaa,0xa4,0x49,0x0e,0x55,0xed),
449  LL(0x28,0x28,0xa0,0x28,0x5d,0x88,0x50,0x75),
450  LL(0x5c,0x5c,0x6d,0x5c,0xda,0x31,0xb8,0x86),
451  LL(0xf8,0xf8,0xc7,0xf8,0x93,0x3f,0xed,0x6b),
452  LL(0x86,0x86,0x22,0x86,0x44,0xa4,0x11,0xc2),
453 #define RC (&(Cx.q[256*N]))
454  0x18,0x23,0xc6,0xe8,0x87,0xb8,0x01,0x4f, /* rc[ROUNDS] */
455  0x36,0xa6,0xd2,0xf5,0x79,0x6f,0x91,0x52,
456  0x60,0xbc,0x9b,0x8e,0xa3,0x0c,0x7b,0x35,
457  0x1d,0xe0,0xd7,0xc2,0x2e,0x4b,0xfe,0x57,
458  0x15,0x77,0x37,0xe5,0x9f,0xf0,0x4a,0xda,
459  0x58,0xc9,0x29,0x0a,0xb1,0xa0,0x6b,0x85,
460  0xbd,0x5d,0x10,0xf4,0xcb,0x3e,0x05,0x67,
461  0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8,
462  0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e,
463  0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33
464  }
465 };
466 
467 void whirlpool_block(WHIRLPOOL_CTX *ctx,const void *inp,size_t n)
468  {
469  int r;
470  const u8 *p=inp;
471  union { u64 q[8]; u8 c[64]; } S,K,*H=(void *)ctx->H.q;
472 
473 #ifdef GO_FOR_MMX
474  GO_FOR_MMX(ctx,inp,n);
475 #endif
476  do {
477 #ifdef OPENSSL_SMALL_FOOTPRINT
478  u64 L[8];
479  int i;
480 
481  for (i=0;i<64;i++) S.c[i] = (K.c[i] = H->c[i]) ^ p[i];
482  for (r=0;r<ROUNDS;r++)
483  {
484  for (i=0;i<8;i++)
485  {
486  L[i] = i ? 0 : RC[r];
487  L[i] ^= C0(K,i) ^ C1(K,(i-1)&7) ^
488  C2(K,(i-2)&7) ^ C3(K,(i-3)&7) ^
489  C4(K,(i-4)&7) ^ C5(K,(i-5)&7) ^
490  C6(K,(i-6)&7) ^ C7(K,(i-7)&7);
491  }
492  memcpy (K.q,L,64);
493  for (i=0;i<8;i++)
494  {
495  L[i] ^= C0(S,i) ^ C1(S,(i-1)&7) ^
496  C2(S,(i-2)&7) ^ C3(S,(i-3)&7) ^
497  C4(S,(i-4)&7) ^ C5(S,(i-5)&7) ^
498  C6(S,(i-6)&7) ^ C7(S,(i-7)&7);
499  }
500  memcpy (S.q,L,64);
501  }
502  for (i=0;i<64;i++) H->c[i] ^= S.c[i] ^ p[i];
503 #else
504  u64 L0,L1,L2,L3,L4,L5,L6,L7;
505 
506 #ifdef STRICT_ALIGNMENT
507  if ((size_t)p & 7)
508  {
509  memcpy (S.c,p,64);
510  S.q[0] ^= (K.q[0] = H->q[0]);
511  S.q[1] ^= (K.q[1] = H->q[1]);
512  S.q[2] ^= (K.q[2] = H->q[2]);
513  S.q[3] ^= (K.q[3] = H->q[3]);
514  S.q[4] ^= (K.q[4] = H->q[4]);
515  S.q[5] ^= (K.q[5] = H->q[5]);
516  S.q[6] ^= (K.q[6] = H->q[6]);
517  S.q[7] ^= (K.q[7] = H->q[7]);
518  }
519  else
520 #endif
521  {
522  const u64 *pa = (const u64*)p;
523  S.q[0] = (K.q[0] = H->q[0]) ^ pa[0];
524  S.q[1] = (K.q[1] = H->q[1]) ^ pa[1];
525  S.q[2] = (K.q[2] = H->q[2]) ^ pa[2];
526  S.q[3] = (K.q[3] = H->q[3]) ^ pa[3];
527  S.q[4] = (K.q[4] = H->q[4]) ^ pa[4];
528  S.q[5] = (K.q[5] = H->q[5]) ^ pa[5];
529  S.q[6] = (K.q[6] = H->q[6]) ^ pa[6];
530  S.q[7] = (K.q[7] = H->q[7]) ^ pa[7];
531  }
532 
533  for(r=0;r<ROUNDS;r++)
534  {
535 #ifdef SMALL_REGISTER_BANK
536  L0 = C0(K,0) ^ C1(K,7) ^ C2(K,6) ^ C3(K,5) ^
537  C4(K,4) ^ C5(K,3) ^ C6(K,2) ^ C7(K,1) ^ RC[r];
538  L1 = C0(K,1) ^ C1(K,0) ^ C2(K,7) ^ C3(K,6) ^
539  C4(K,5) ^ C5(K,4) ^ C6(K,3) ^ C7(K,2);
540  L2 = C0(K,2) ^ C1(K,1) ^ C2(K,0) ^ C3(K,7) ^
541  C4(K,6) ^ C5(K,5) ^ C6(K,4) ^ C7(K,3);
542  L3 = C0(K,3) ^ C1(K,2) ^ C2(K,1) ^ C3(K,0) ^
543  C4(K,7) ^ C5(K,6) ^ C6(K,5) ^ C7(K,4);
544  L4 = C0(K,4) ^ C1(K,3) ^ C2(K,2) ^ C3(K,1) ^
545  C4(K,0) ^ C5(K,7) ^ C6(K,6) ^ C7(K,5);
546  L5 = C0(K,5) ^ C1(K,4) ^ C2(K,3) ^ C3(K,2) ^
547  C4(K,1) ^ C5(K,0) ^ C6(K,7) ^ C7(K,6);
548  L6 = C0(K,6) ^ C1(K,5) ^ C2(K,4) ^ C3(K,3) ^
549  C4(K,2) ^ C5(K,1) ^ C6(K,0) ^ C7(K,7);
550  L7 = C0(K,7) ^ C1(K,6) ^ C2(K,5) ^ C3(K,4) ^
551  C4(K,3) ^ C5(K,2) ^ C6(K,1) ^ C7(K,0);
552 
553  K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
554  K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;
555 
556  L0 ^= C0(S,0) ^ C1(S,7) ^ C2(S,6) ^ C3(S,5) ^
557  C4(S,4) ^ C5(S,3) ^ C6(S,2) ^ C7(S,1);
558  L1 ^= C0(S,1) ^ C1(S,0) ^ C2(S,7) ^ C3(S,6) ^
559  C4(S,5) ^ C5(S,4) ^ C6(S,3) ^ C7(S,2);
560  L2 ^= C0(S,2) ^ C1(S,1) ^ C2(S,0) ^ C3(S,7) ^
561  C4(S,6) ^ C5(S,5) ^ C6(S,4) ^ C7(S,3);
562  L3 ^= C0(S,3) ^ C1(S,2) ^ C2(S,1) ^ C3(S,0) ^
563  C4(S,7) ^ C5(S,6) ^ C6(S,5) ^ C7(S,4);
564  L4 ^= C0(S,4) ^ C1(S,3) ^ C2(S,2) ^ C3(S,1) ^
565  C4(S,0) ^ C5(S,7) ^ C6(S,6) ^ C7(S,5);
566  L5 ^= C0(S,5) ^ C1(S,4) ^ C2(S,3) ^ C3(S,2) ^
567  C4(S,1) ^ C5(S,0) ^ C6(S,7) ^ C7(S,6);
568  L6 ^= C0(S,6) ^ C1(S,5) ^ C2(S,4) ^ C3(S,3) ^
569  C4(S,2) ^ C5(S,1) ^ C6(S,0) ^ C7(S,7);
570  L7 ^= C0(S,7) ^ C1(S,6) ^ C2(S,5) ^ C3(S,4) ^
571  C4(S,3) ^ C5(S,2) ^ C6(S,1) ^ C7(S,0);
572 
573  S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
574  S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
575 #else
576  L0 = C0(K,0); L1 = C1(K,0); L2 = C2(K,0); L3 = C3(K,0);
577  L4 = C4(K,0); L5 = C5(K,0); L6 = C6(K,0); L7 = C7(K,0);
578  L0 ^= RC[r];
579 
580  L1 ^= C0(K,1); L2 ^= C1(K,1); L3 ^= C2(K,1); L4 ^= C3(K,1);
581  L5 ^= C4(K,1); L6 ^= C5(K,1); L7 ^= C6(K,1); L0 ^= C7(K,1);
582 
583  L2 ^= C0(K,2); L3 ^= C1(K,2); L4 ^= C2(K,2); L5 ^= C3(K,2);
584  L6 ^= C4(K,2); L7 ^= C5(K,2); L0 ^= C6(K,2); L1 ^= C7(K,2);
585 
586  L3 ^= C0(K,3); L4 ^= C1(K,3); L5 ^= C2(K,3); L6 ^= C3(K,3);
587  L7 ^= C4(K,3); L0 ^= C5(K,3); L1 ^= C6(K,3); L2 ^= C7(K,3);
588 
589  L4 ^= C0(K,4); L5 ^= C1(K,4); L6 ^= C2(K,4); L7 ^= C3(K,4);
590  L0 ^= C4(K,4); L1 ^= C5(K,4); L2 ^= C6(K,4); L3 ^= C7(K,4);
591 
592  L5 ^= C0(K,5); L6 ^= C1(K,5); L7 ^= C2(K,5); L0 ^= C3(K,5);
593  L1 ^= C4(K,5); L2 ^= C5(K,5); L3 ^= C6(K,5); L4 ^= C7(K,5);
594 
595  L6 ^= C0(K,6); L7 ^= C1(K,6); L0 ^= C2(K,6); L1 ^= C3(K,6);
596  L2 ^= C4(K,6); L3 ^= C5(K,6); L4 ^= C6(K,6); L5 ^= C7(K,6);
597 
598  L7 ^= C0(K,7); L0 ^= C1(K,7); L1 ^= C2(K,7); L2 ^= C3(K,7);
599  L3 ^= C4(K,7); L4 ^= C5(K,7); L5 ^= C6(K,7); L6 ^= C7(K,7);
600 
601  K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
602  K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;
603 
604  L0 ^= C0(S,0); L1 ^= C1(S,0); L2 ^= C2(S,0); L3 ^= C3(S,0);
605  L4 ^= C4(S,0); L5 ^= C5(S,0); L6 ^= C6(S,0); L7 ^= C7(S,0);
606 
607  L1 ^= C0(S,1); L2 ^= C1(S,1); L3 ^= C2(S,1); L4 ^= C3(S,1);
608  L5 ^= C4(S,1); L6 ^= C5(S,1); L7 ^= C6(S,1); L0 ^= C7(S,1);
609 
610  L2 ^= C0(S,2); L3 ^= C1(S,2); L4 ^= C2(S,2); L5 ^= C3(S,2);
611  L6 ^= C4(S,2); L7 ^= C5(S,2); L0 ^= C6(S,2); L1 ^= C7(S,2);
612 
613  L3 ^= C0(S,3); L4 ^= C1(S,3); L5 ^= C2(S,3); L6 ^= C3(S,3);
614  L7 ^= C4(S,3); L0 ^= C5(S,3); L1 ^= C6(S,3); L2 ^= C7(S,3);
615 
616  L4 ^= C0(S,4); L5 ^= C1(S,4); L6 ^= C2(S,4); L7 ^= C3(S,4);
617  L0 ^= C4(S,4); L1 ^= C5(S,4); L2 ^= C6(S,4); L3 ^= C7(S,4);
618 
619  L5 ^= C0(S,5); L6 ^= C1(S,5); L7 ^= C2(S,5); L0 ^= C3(S,5);
620  L1 ^= C4(S,5); L2 ^= C5(S,5); L3 ^= C6(S,5); L4 ^= C7(S,5);
621 
622  L6 ^= C0(S,6); L7 ^= C1(S,6); L0 ^= C2(S,6); L1 ^= C3(S,6);
623  L2 ^= C4(S,6); L3 ^= C5(S,6); L4 ^= C6(S,6); L5 ^= C7(S,6);
624 
625  L7 ^= C0(S,7); L0 ^= C1(S,7); L1 ^= C2(S,7); L2 ^= C3(S,7);
626  L3 ^= C4(S,7); L4 ^= C5(S,7); L5 ^= C6(S,7); L6 ^= C7(S,7);
627 
628  S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
629  S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
630 #endif
631  }
632 
633 #ifdef STRICT_ALIGNMENT
634  if ((size_t)p & 7)
635  {
636  int i;
637  for(i=0;i<64;i++) H->c[i] ^= S.c[i] ^ p[i];
638  }
639  else
640 #endif
641  {
642  const u64 *pa=(const u64 *)p;
643  H->q[0] ^= S.q[0] ^ pa[0];
644  H->q[1] ^= S.q[1] ^ pa[1];
645  H->q[2] ^= S.q[2] ^ pa[2];
646  H->q[3] ^= S.q[3] ^ pa[3];
647  H->q[4] ^= S.q[4] ^ pa[4];
648  H->q[5] ^= S.q[5] ^ pa[5];
649  H->q[6] ^= S.q[6] ^ pa[6];
650  H->q[7] ^= S.q[7] ^ pa[7];
651  }
652 #endif
653  p += 64;
654  } while(--n);
655  }