OpenSSL  1.0.1c
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros
gcm128.c
Go to the documentation of this file.
1 /* ====================================================================
2  * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in
13  * the documentation and/or other materials provided with the
14  * distribution.
15  *
16  * 3. All advertising materials mentioning features or use of this
17  * software must display the following acknowledgment:
18  * "This product includes software developed by the OpenSSL Project
19  * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20  *
21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22  * endorse or promote products derived from this software without
23  * prior written permission. For written permission, please contact
25  *
26  * 5. Products derived from this software may not be called "OpenSSL"
27  * nor may "OpenSSL" appear in their names without prior written
28  * permission of the OpenSSL Project.
29  *
30  * 6. Redistributions of any form whatsoever must retain the following
31  * acknowledgment:
32  * "This product includes software developed by the OpenSSL Project
33  * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34  *
35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46  * OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  */
49 
50 #define OPENSSL_FIPSAPI
51 
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
54 #include <string.h>
55 
56 #ifndef MODES_DEBUG
57 # ifndef NDEBUG
58 # define NDEBUG
59 # endif
60 #endif
61 #include <assert.h>
62 
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
65 #undef GETU32
66 #define GETU32(p) BSWAP4(*(const u32 *)(p))
67 #undef PUTU32
68 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
69 #endif
70 
71 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V) do { \
73  if (sizeof(size_t)==8) { \
74  u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75  V.lo = (V.hi<<63)|(V.lo>>1); \
76  V.hi = (V.hi>>1 )^T; \
77  } \
78  else { \
79  u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80  V.lo = (V.hi<<63)|(V.lo>>1); \
81  V.hi = (V.hi>>1 )^((u64)T<<32); \
82  } \
83 } while(0)
84 
85 /*
86  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87  * never be set to 8. 8 is effectively reserved for testing purposes.
88  * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89  * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90  * whole spectrum of possible table driven implementations. Why? In
91  * non-"Shoup's" case memory access pattern is segmented in such manner,
92  * that it's trivial to see that cache timing information can reveal
93  * fair portion of intermediate hash value. Given that ciphertext is
94  * always available to attacker, it's possible for him to attempt to
95  * deduce secret parameter H and if successful, tamper with messages
96  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97  * not as trivial, but there is no reason to believe that it's resistant
98  * to cache-timing attack. And the thing about "8-bit" implementation is
99  * that it consumes 16 (sixteen) times more memory, 4KB per individual
100  * key + 1KB shared. Well, on pros side it should be twice as fast as
101  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102  * was observed to run ~75% faster, closer to 100% for commercial
103  * compilers... Yet "4-bit" procedure is preferred, because it's
104  * believed to provide better security-performance balance and adequate
105  * all-round performance. "All-round" refers to things like:
106  *
107  * - shorter setup time effectively improves overall timing for
108  * handling short messages;
109  * - larger table allocation can become unbearable because of VM
110  * subsystem penalties (for example on Windows large enough free
111  * results in VM working set trimming, meaning that consequent
112  * malloc would immediately incur working set expansion);
113  * - larger table has larger cache footprint, which can affect
114  * performance of other code paths (not necessarily even from same
115  * thread in Hyper-Threading world);
116  *
117  * Value of 1 is not appropriate for performance reasons.
118  */
119 #if TABLE_BITS==8
120 
121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122 {
123  int i, j;
124  u128 V;
125 
126  Htable[0].hi = 0;
127  Htable[0].lo = 0;
128  V.hi = H[0];
129  V.lo = H[1];
130 
131  for (Htable[128]=V, i=64; i>0; i>>=1) {
132  REDUCE1BIT(V);
133  Htable[i] = V;
134  }
135 
136  for (i=2; i<256; i<<=1) {
137  u128 *Hi = Htable+i, H0 = *Hi;
138  for (j=1; j<i; ++j) {
139  Hi[j].hi = H0.hi^Htable[j].hi;
140  Hi[j].lo = H0.lo^Htable[j].lo;
141  }
142  }
143 }
144 
145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146 {
147  u128 Z = { 0, 0};
148  const u8 *xi = (const u8 *)Xi+15;
149  size_t rem, n = *xi;
150  const union { long one; char little; } is_endian = {1};
151  static const size_t rem_8bit[256] = {
152  PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
153  PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
154  PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
155  PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
156  PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
157  PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
158  PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
159  PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
160  PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
161  PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
162  PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
163  PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
164  PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
165  PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
166  PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
167  PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
168  PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
169  PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
170  PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
171  PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
172  PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
173  PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
174  PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
175  PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
176  PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
177  PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
178  PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
179  PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
180  PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
181  PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
182  PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
183  PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
184  PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
185  PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
186  PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
187  PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
188  PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
189  PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
190  PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
191  PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
192  PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
193  PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
194  PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
195  PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
196  PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
197  PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
198  PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
199  PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
200  PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
201  PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
202  PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
203  PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
204  PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
205  PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
206  PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
207  PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
208  PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
209  PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
210  PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
211  PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
212  PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
213  PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
214  PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
215  PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
216 
217  while (1) {
218  Z.hi ^= Htable[n].hi;
219  Z.lo ^= Htable[n].lo;
220 
221  if ((u8 *)Xi==xi) break;
222 
223  n = *(--xi);
224 
225  rem = (size_t)Z.lo&0xff;
226  Z.lo = (Z.hi<<56)|(Z.lo>>8);
227  Z.hi = (Z.hi>>8);
228  if (sizeof(size_t)==8)
229  Z.hi ^= rem_8bit[rem];
230  else
231  Z.hi ^= (u64)rem_8bit[rem]<<32;
232  }
233 
234  if (is_endian.little) {
235 #ifdef BSWAP8
236  Xi[0] = BSWAP8(Z.hi);
237  Xi[1] = BSWAP8(Z.lo);
238 #else
239  u8 *p = (u8 *)Xi;
240  u32 v;
241  v = (u32)(Z.hi>>32); PUTU32(p,v);
242  v = (u32)(Z.hi); PUTU32(p+4,v);
243  v = (u32)(Z.lo>>32); PUTU32(p+8,v);
244  v = (u32)(Z.lo); PUTU32(p+12,v);
245 #endif
246  }
247  else {
248  Xi[0] = Z.hi;
249  Xi[1] = Z.lo;
250  }
251 }
252 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
253 
254 #elif TABLE_BITS==4
255 
256 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
257 {
258  u128 V;
259 #if defined(OPENSSL_SMALL_FOOTPRINT)
260  int i;
261 #endif
262 
263  Htable[0].hi = 0;
264  Htable[0].lo = 0;
265  V.hi = H[0];
266  V.lo = H[1];
267 
268 #if defined(OPENSSL_SMALL_FOOTPRINT)
269  for (Htable[8]=V, i=4; i>0; i>>=1) {
270  REDUCE1BIT(V);
271  Htable[i] = V;
272  }
273 
274  for (i=2; i<16; i<<=1) {
275  u128 *Hi = Htable+i;
276  int j;
277  for (V=*Hi, j=1; j<i; ++j) {
278  Hi[j].hi = V.hi^Htable[j].hi;
279  Hi[j].lo = V.lo^Htable[j].lo;
280  }
281  }
282 #else
283  Htable[8] = V;
284  REDUCE1BIT(V);
285  Htable[4] = V;
286  REDUCE1BIT(V);
287  Htable[2] = V;
288  REDUCE1BIT(V);
289  Htable[1] = V;
290  Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
291  V=Htable[4];
292  Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
293  Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
294  Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
295  V=Htable[8];
296  Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
297  Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
298  Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
299  Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
300  Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
301  Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
302  Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
303 #endif
304 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
305  /*
306  * ARM assembler expects specific dword order in Htable.
307  */
308  {
309  int j;
310  const union { long one; char little; } is_endian = {1};
311 
312  if (is_endian.little)
313  for (j=0;j<16;++j) {
314  V = Htable[j];
315  Htable[j].hi = V.lo;
316  Htable[j].lo = V.hi;
317  }
318  else
319  for (j=0;j<16;++j) {
320  V = Htable[j];
321  Htable[j].hi = V.lo<<32|V.lo>>32;
322  Htable[j].lo = V.hi<<32|V.hi>>32;
323  }
324  }
325 #endif
326 }
327 
328 #ifndef GHASH_ASM
329 static const size_t rem_4bit[16] = {
330  PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
331  PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
332  PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
333  PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
334 
335 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
336 {
337  u128 Z;
338  int cnt = 15;
339  size_t rem, nlo, nhi;
340  const union { long one; char little; } is_endian = {1};
341 
342  nlo = ((const u8 *)Xi)[15];
343  nhi = nlo>>4;
344  nlo &= 0xf;
345 
346  Z.hi = Htable[nlo].hi;
347  Z.lo = Htable[nlo].lo;
348 
349  while (1) {
350  rem = (size_t)Z.lo&0xf;
351  Z.lo = (Z.hi<<60)|(Z.lo>>4);
352  Z.hi = (Z.hi>>4);
353  if (sizeof(size_t)==8)
354  Z.hi ^= rem_4bit[rem];
355  else
356  Z.hi ^= (u64)rem_4bit[rem]<<32;
357 
358  Z.hi ^= Htable[nhi].hi;
359  Z.lo ^= Htable[nhi].lo;
360 
361  if (--cnt<0) break;
362 
363  nlo = ((const u8 *)Xi)[cnt];
364  nhi = nlo>>4;
365  nlo &= 0xf;
366 
367  rem = (size_t)Z.lo&0xf;
368  Z.lo = (Z.hi<<60)|(Z.lo>>4);
369  Z.hi = (Z.hi>>4);
370  if (sizeof(size_t)==8)
371  Z.hi ^= rem_4bit[rem];
372  else
373  Z.hi ^= (u64)rem_4bit[rem]<<32;
374 
375  Z.hi ^= Htable[nlo].hi;
376  Z.lo ^= Htable[nlo].lo;
377  }
378 
379  if (is_endian.little) {
380 #ifdef BSWAP8
381  Xi[0] = BSWAP8(Z.hi);
382  Xi[1] = BSWAP8(Z.lo);
383 #else
384  u8 *p = (u8 *)Xi;
385  u32 v;
386  v = (u32)(Z.hi>>32); PUTU32(p,v);
387  v = (u32)(Z.hi); PUTU32(p+4,v);
388  v = (u32)(Z.lo>>32); PUTU32(p+8,v);
389  v = (u32)(Z.lo); PUTU32(p+12,v);
390 #endif
391  }
392  else {
393  Xi[0] = Z.hi;
394  Xi[1] = Z.lo;
395  }
396 }
397 
398 #if !defined(OPENSSL_SMALL_FOOTPRINT)
399 /*
400  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
401  * details... Compiler-generated code doesn't seem to give any
402  * performance improvement, at least not on x86[_64]. It's here
403  * mostly as reference and a placeholder for possible future
404  * non-trivial optimization[s]...
405  */
406 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
407  const u8 *inp,size_t len)
408 {
409  u128 Z;
410  int cnt;
411  size_t rem, nlo, nhi;
412  const union { long one; char little; } is_endian = {1};
413 
414 #if 1
415  do {
416  cnt = 15;
417  nlo = ((const u8 *)Xi)[15];
418  nlo ^= inp[15];
419  nhi = nlo>>4;
420  nlo &= 0xf;
421 
422  Z.hi = Htable[nlo].hi;
423  Z.lo = Htable[nlo].lo;
424 
425  while (1) {
426  rem = (size_t)Z.lo&0xf;
427  Z.lo = (Z.hi<<60)|(Z.lo>>4);
428  Z.hi = (Z.hi>>4);
429  if (sizeof(size_t)==8)
430  Z.hi ^= rem_4bit[rem];
431  else
432  Z.hi ^= (u64)rem_4bit[rem]<<32;
433 
434  Z.hi ^= Htable[nhi].hi;
435  Z.lo ^= Htable[nhi].lo;
436 
437  if (--cnt<0) break;
438 
439  nlo = ((const u8 *)Xi)[cnt];
440  nlo ^= inp[cnt];
441  nhi = nlo>>4;
442  nlo &= 0xf;
443 
444  rem = (size_t)Z.lo&0xf;
445  Z.lo = (Z.hi<<60)|(Z.lo>>4);
446  Z.hi = (Z.hi>>4);
447  if (sizeof(size_t)==8)
448  Z.hi ^= rem_4bit[rem];
449  else
450  Z.hi ^= (u64)rem_4bit[rem]<<32;
451 
452  Z.hi ^= Htable[nlo].hi;
453  Z.lo ^= Htable[nlo].lo;
454  }
455 #else
456  /*
457  * Extra 256+16 bytes per-key plus 512 bytes shared tables
458  * [should] give ~50% improvement... One could have PACK()-ed
459  * the rem_8bit even here, but the priority is to minimize
460  * cache footprint...
461  */
462  u128 Hshr4[16]; /* Htable shifted right by 4 bits */
463  u8 Hshl4[16]; /* Htable shifted left by 4 bits */
464  static const unsigned short rem_8bit[256] = {
465  0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
466  0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
467  0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
468  0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
469  0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
470  0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
471  0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
472  0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
473  0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
474  0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
475  0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
476  0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
477  0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
478  0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
479  0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
480  0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
481  0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
482  0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
483  0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
484  0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
485  0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
486  0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
487  0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
488  0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
489  0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
490  0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
491  0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
492  0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
493  0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
494  0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
495  0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
496  0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
497  /*
498  * This pre-processing phase slows down procedure by approximately
499  * same time as it makes each loop spin faster. In other words
500  * single block performance is approximately same as straightforward
501  * "4-bit" implementation, and then it goes only faster...
502  */
503  for (cnt=0; cnt<16; ++cnt) {
504  Z.hi = Htable[cnt].hi;
505  Z.lo = Htable[cnt].lo;
506  Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
507  Hshr4[cnt].hi = (Z.hi>>4);
508  Hshl4[cnt] = (u8)(Z.lo<<4);
509  }
510 
511  do {
512  for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
513  nlo = ((const u8 *)Xi)[cnt];
514  nlo ^= inp[cnt];
515  nhi = nlo>>4;
516  nlo &= 0xf;
517 
518  Z.hi ^= Htable[nlo].hi;
519  Z.lo ^= Htable[nlo].lo;
520 
521  rem = (size_t)Z.lo&0xff;
522 
523  Z.lo = (Z.hi<<56)|(Z.lo>>8);
524  Z.hi = (Z.hi>>8);
525 
526  Z.hi ^= Hshr4[nhi].hi;
527  Z.lo ^= Hshr4[nhi].lo;
528  Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
529  }
530 
531  nlo = ((const u8 *)Xi)[0];
532  nlo ^= inp[0];
533  nhi = nlo>>4;
534  nlo &= 0xf;
535 
536  Z.hi ^= Htable[nlo].hi;
537  Z.lo ^= Htable[nlo].lo;
538 
539  rem = (size_t)Z.lo&0xf;
540 
541  Z.lo = (Z.hi<<60)|(Z.lo>>4);
542  Z.hi = (Z.hi>>4);
543 
544  Z.hi ^= Htable[nhi].hi;
545  Z.lo ^= Htable[nhi].lo;
546  Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
547 #endif
548 
549  if (is_endian.little) {
550 #ifdef BSWAP8
551  Xi[0] = BSWAP8(Z.hi);
552  Xi[1] = BSWAP8(Z.lo);
553 #else
554  u8 *p = (u8 *)Xi;
555  u32 v;
556  v = (u32)(Z.hi>>32); PUTU32(p,v);
557  v = (u32)(Z.hi); PUTU32(p+4,v);
558  v = (u32)(Z.lo>>32); PUTU32(p+8,v);
559  v = (u32)(Z.lo); PUTU32(p+12,v);
560 #endif
561  }
562  else {
563  Xi[0] = Z.hi;
564  Xi[1] = Z.lo;
565  }
566  } while (inp+=16, len-=16);
567 }
568 #endif
569 #else
570 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
571 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
572 #endif
573 
574 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
575 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
576 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
577 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
578  * trashing effect. In other words idea is to hash data while it's
579  * still in L1 cache after encryption pass... */
580 #define GHASH_CHUNK (3*1024)
581 #endif
582 
583 #else /* TABLE_BITS */
584 
585 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
586 {
587  u128 V,Z = { 0,0 };
588  long X;
589  int i,j;
590  const long *xi = (const long *)Xi;
591  const union { long one; char little; } is_endian = {1};
592 
593  V.hi = H[0]; /* H is in host byte order, no byte swapping */
594  V.lo = H[1];
595 
596  for (j=0; j<16/sizeof(long); ++j) {
597  if (is_endian.little) {
598  if (sizeof(long)==8) {
599 #ifdef BSWAP8
600  X = (long)(BSWAP8(xi[j]));
601 #else
602  const u8 *p = (const u8 *)(xi+j);
603  X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
604 #endif
605  }
606  else {
607  const u8 *p = (const u8 *)(xi+j);
608  X = (long)GETU32(p);
609  }
610  }
611  else
612  X = xi[j];
613 
614  for (i=0; i<8*sizeof(long); ++i, X<<=1) {
615  u64 M = (u64)(X>>(8*sizeof(long)-1));
616  Z.hi ^= V.hi&M;
617  Z.lo ^= V.lo&M;
618 
619  REDUCE1BIT(V);
620  }
621  }
622 
623  if (is_endian.little) {
624 #ifdef BSWAP8
625  Xi[0] = BSWAP8(Z.hi);
626  Xi[1] = BSWAP8(Z.lo);
627 #else
628  u8 *p = (u8 *)Xi;
629  u32 v;
630  v = (u32)(Z.hi>>32); PUTU32(p,v);
631  v = (u32)(Z.hi); PUTU32(p+4,v);
632  v = (u32)(Z.lo>>32); PUTU32(p+8,v);
633  v = (u32)(Z.lo); PUTU32(p+12,v);
634 #endif
635  }
636  else {
637  Xi[0] = Z.hi;
638  Xi[1] = Z.lo;
639  }
640 }
641 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
642 
643 #endif
644 
645 #if TABLE_BITS==4 && defined(GHASH_ASM)
646 # if !defined(I386_ONLY) && \
647  (defined(__i386) || defined(__i386__) || \
648  defined(__x86_64) || defined(__x86_64__) || \
649  defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
650 # define GHASH_ASM_X86_OR_64
651 # define GCM_FUNCREF_4BIT
652 extern unsigned int OPENSSL_ia32cap_P[2];
653 
654 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
655 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
656 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
657 
658 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
659 # define GHASH_ASM_X86
660 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
661 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
662 
663 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
664 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
665 # endif
666 # elif defined(__arm__) || defined(__arm)
667 # include "arm_arch.h"
668 # if __ARM_ARCH__>=7
669 # define GHASH_ASM_ARM
670 # define GCM_FUNCREF_4BIT
671 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
672 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
673 # endif
674 # endif
675 #endif
676 
677 #ifdef GCM_FUNCREF_4BIT
678 # undef GCM_MUL
679 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
680 # ifdef GHASH
681 # undef GHASH
682 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
683 # endif
684 #endif
685 
687 {
688  const union { long one; char little; } is_endian = {1};
689 
690  memset(ctx,0,sizeof(*ctx));
691  ctx->block = block;
692  ctx->key = key;
693 
694  (*block)(ctx->H.c,ctx->H.c,key);
695 
696  if (is_endian.little) {
697  /* H is stored in host byte order */
698 #ifdef BSWAP8
699  ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
700  ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
701 #else
702  u8 *p = ctx->H.c;
703  u64 hi,lo;
704  hi = (u64)GETU32(p) <<32|GETU32(p+4);
705  lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
706  ctx->H.u[0] = hi;
707  ctx->H.u[1] = lo;
708 #endif
709  }
710 
711 #if TABLE_BITS==8
712  gcm_init_8bit(ctx->Htable,ctx->H.u);
713 #elif TABLE_BITS==4
714 # if defined(GHASH_ASM_X86_OR_64)
715 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
716  if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
717  OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
718  gcm_init_clmul(ctx->Htable,ctx->H.u);
719  ctx->gmult = gcm_gmult_clmul;
720  ctx->ghash = gcm_ghash_clmul;
721  return;
722  }
723 # endif
724  gcm_init_4bit(ctx->Htable,ctx->H.u);
725 # if defined(GHASH_ASM_X86) /* x86 only */
726 # if defined(OPENSSL_IA32_SSE2)
727  if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
728 # else
729  if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
730 # endif
731  ctx->gmult = gcm_gmult_4bit_mmx;
732  ctx->ghash = gcm_ghash_4bit_mmx;
733  } else {
734  ctx->gmult = gcm_gmult_4bit_x86;
735  ctx->ghash = gcm_ghash_4bit_x86;
736  }
737 # else
738  ctx->gmult = gcm_gmult_4bit;
739  ctx->ghash = gcm_ghash_4bit;
740 # endif
741 # elif defined(GHASH_ASM_ARM)
743  ctx->gmult = gcm_gmult_neon;
744  ctx->ghash = gcm_ghash_neon;
745  } else {
746  gcm_init_4bit(ctx->Htable,ctx->H.u);
747  ctx->gmult = gcm_gmult_4bit;
748  ctx->ghash = gcm_ghash_4bit;
749  }
750 # else
751  gcm_init_4bit(ctx->Htable,ctx->H.u);
752 # endif
753 #endif
754 }
755 
756 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
757 {
758  const union { long one; char little; } is_endian = {1};
759  unsigned int ctr;
760 #ifdef GCM_FUNCREF_4BIT
761  void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
762 #endif
763 
764  ctx->Yi.u[0] = 0;
765  ctx->Yi.u[1] = 0;
766  ctx->Xi.u[0] = 0;
767  ctx->Xi.u[1] = 0;
768  ctx->len.u[0] = 0; /* AAD length */
769  ctx->len.u[1] = 0; /* message length */
770  ctx->ares = 0;
771  ctx->mres = 0;
772 
773  if (len==12) {
774  memcpy(ctx->Yi.c,iv,12);
775  ctx->Yi.c[15]=1;
776  ctr=1;
777  }
778  else {
779  size_t i;
780  u64 len0 = len;
781 
782  while (len>=16) {
783  for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
784  GCM_MUL(ctx,Yi);
785  iv += 16;
786  len -= 16;
787  }
788  if (len) {
789  for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
790  GCM_MUL(ctx,Yi);
791  }
792  len0 <<= 3;
793  if (is_endian.little) {
794 #ifdef BSWAP8
795  ctx->Yi.u[1] ^= BSWAP8(len0);
796 #else
797  ctx->Yi.c[8] ^= (u8)(len0>>56);
798  ctx->Yi.c[9] ^= (u8)(len0>>48);
799  ctx->Yi.c[10] ^= (u8)(len0>>40);
800  ctx->Yi.c[11] ^= (u8)(len0>>32);
801  ctx->Yi.c[12] ^= (u8)(len0>>24);
802  ctx->Yi.c[13] ^= (u8)(len0>>16);
803  ctx->Yi.c[14] ^= (u8)(len0>>8);
804  ctx->Yi.c[15] ^= (u8)(len0);
805 #endif
806  }
807  else
808  ctx->Yi.u[1] ^= len0;
809 
810  GCM_MUL(ctx,Yi);
811 
812  if (is_endian.little)
813  ctr = GETU32(ctx->Yi.c+12);
814  else
815  ctr = ctx->Yi.d[3];
816  }
817 
818  (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
819  ++ctr;
820  if (is_endian.little)
821  PUTU32(ctx->Yi.c+12,ctr);
822  else
823  ctx->Yi.d[3] = ctr;
824 }
825 
826 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
827 {
828  size_t i;
829  unsigned int n;
830  u64 alen = ctx->len.u[0];
831 #ifdef GCM_FUNCREF_4BIT
832  void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
833 # ifdef GHASH
834  void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
835  const u8 *inp,size_t len) = ctx->ghash;
836 # endif
837 #endif
838 
839  if (ctx->len.u[1]) return -2;
840 
841  alen += len;
842  if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
843  return -1;
844  ctx->len.u[0] = alen;
845 
846  n = ctx->ares;
847  if (n) {
848  while (n && len) {
849  ctx->Xi.c[n] ^= *(aad++);
850  --len;
851  n = (n+1)%16;
852  }
853  if (n==0) GCM_MUL(ctx,Xi);
854  else {
855  ctx->ares = n;
856  return 0;
857  }
858  }
859 
860 #ifdef GHASH
861  if ((i = (len&(size_t)-16))) {
862  GHASH(ctx,aad,i);
863  aad += i;
864  len -= i;
865  }
866 #else
867  while (len>=16) {
868  for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
869  GCM_MUL(ctx,Xi);
870  aad += 16;
871  len -= 16;
872  }
873 #endif
874  if (len) {
875  n = (unsigned int)len;
876  for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
877  }
878 
879  ctx->ares = n;
880  return 0;
881 }
882 
884  const unsigned char *in, unsigned char *out,
885  size_t len)
886 {
887  const union { long one; char little; } is_endian = {1};
888  unsigned int n, ctr;
889  size_t i;
890  u64 mlen = ctx->len.u[1];
891  block128_f block = ctx->block;
892  void *key = ctx->key;
893 #ifdef GCM_FUNCREF_4BIT
894  void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
895 # ifdef GHASH
896  void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
897  const u8 *inp,size_t len) = ctx->ghash;
898 # endif
899 #endif
900 
901 #if 0
902  n = (unsigned int)mlen%16; /* alternative to ctx->mres */
903 #endif
904  mlen += len;
905  if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
906  return -1;
907  ctx->len.u[1] = mlen;
908 
909  if (ctx->ares) {
910  /* First call to encrypt finalizes GHASH(AAD) */
911  GCM_MUL(ctx,Xi);
912  ctx->ares = 0;
913  }
914 
915  if (is_endian.little)
916  ctr = GETU32(ctx->Yi.c+12);
917  else
918  ctr = ctx->Yi.d[3];
919 
920  n = ctx->mres;
921 #if !defined(OPENSSL_SMALL_FOOTPRINT)
922  if (16%sizeof(size_t) == 0) do { /* always true actually */
923  if (n) {
924  while (n && len) {
925  ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
926  --len;
927  n = (n+1)%16;
928  }
929  if (n==0) GCM_MUL(ctx,Xi);
930  else {
931  ctx->mres = n;
932  return 0;
933  }
934  }
935 #if defined(STRICT_ALIGNMENT)
936  if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
937  break;
938 #endif
939 #if defined(GHASH) && defined(GHASH_CHUNK)
940  while (len>=GHASH_CHUNK) {
941  size_t j=GHASH_CHUNK;
942 
943  while (j) {
944  (*block)(ctx->Yi.c,ctx->EKi.c,key);
945  ++ctr;
946  if (is_endian.little)
947  PUTU32(ctx->Yi.c+12,ctr);
948  else
949  ctx->Yi.d[3] = ctr;
950  for (i=0; i<16; i+=sizeof(size_t))
951  *(size_t *)(out+i) =
952  *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
953  out += 16;
954  in += 16;
955  j -= 16;
956  }
957  GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
958  len -= GHASH_CHUNK;
959  }
960  if ((i = (len&(size_t)-16))) {
961  size_t j=i;
962 
963  while (len>=16) {
964  (*block)(ctx->Yi.c,ctx->EKi.c,key);
965  ++ctr;
966  if (is_endian.little)
967  PUTU32(ctx->Yi.c+12,ctr);
968  else
969  ctx->Yi.d[3] = ctr;
970  for (i=0; i<16; i+=sizeof(size_t))
971  *(size_t *)(out+i) =
972  *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
973  out += 16;
974  in += 16;
975  len -= 16;
976  }
977  GHASH(ctx,out-j,j);
978  }
979 #else
980  while (len>=16) {
981  (*block)(ctx->Yi.c,ctx->EKi.c,key);
982  ++ctr;
983  if (is_endian.little)
984  PUTU32(ctx->Yi.c+12,ctr);
985  else
986  ctx->Yi.d[3] = ctr;
987  for (i=0; i<16; i+=sizeof(size_t))
988  *(size_t *)(ctx->Xi.c+i) ^=
989  *(size_t *)(out+i) =
990  *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
991  GCM_MUL(ctx,Xi);
992  out += 16;
993  in += 16;
994  len -= 16;
995  }
996 #endif
997  if (len) {
998  (*block)(ctx->Yi.c,ctx->EKi.c,key);
999  ++ctr;
1000  if (is_endian.little)
1001  PUTU32(ctx->Yi.c+12,ctr);
1002  else
1003  ctx->Yi.d[3] = ctr;
1004  while (len--) {
1005  ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1006  ++n;
1007  }
1008  }
1009 
1010  ctx->mres = n;
1011  return 0;
1012  } while(0);
1013 #endif
1014  for (i=0;i<len;++i) {
1015  if (n==0) {
1016  (*block)(ctx->Yi.c,ctx->EKi.c,key);
1017  ++ctr;
1018  if (is_endian.little)
1019  PUTU32(ctx->Yi.c+12,ctr);
1020  else
1021  ctx->Yi.d[3] = ctr;
1022  }
1023  ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1024  n = (n+1)%16;
1025  if (n==0)
1026  GCM_MUL(ctx,Xi);
1027  }
1028 
1029  ctx->mres = n;
1030  return 0;
1031 }
1032 
1034  const unsigned char *in, unsigned char *out,
1035  size_t len)
1036 {
1037  const union { long one; char little; } is_endian = {1};
1038  unsigned int n, ctr;
1039  size_t i;
1040  u64 mlen = ctx->len.u[1];
1041  block128_f block = ctx->block;
1042  void *key = ctx->key;
1043 #ifdef GCM_FUNCREF_4BIT
1044  void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1045 # ifdef GHASH
1046  void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1047  const u8 *inp,size_t len) = ctx->ghash;
1048 # endif
1049 #endif
1050 
1051  mlen += len;
1052  if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1053  return -1;
1054  ctx->len.u[1] = mlen;
1055 
1056  if (ctx->ares) {
1057  /* First call to decrypt finalizes GHASH(AAD) */
1058  GCM_MUL(ctx,Xi);
1059  ctx->ares = 0;
1060  }
1061 
1062  if (is_endian.little)
1063  ctr = GETU32(ctx->Yi.c+12);
1064  else
1065  ctr = ctx->Yi.d[3];
1066 
1067  n = ctx->mres;
1068 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1069  if (16%sizeof(size_t) == 0) do { /* always true actually */
1070  if (n) {
1071  while (n && len) {
1072  u8 c = *(in++);
1073  *(out++) = c^ctx->EKi.c[n];
1074  ctx->Xi.c[n] ^= c;
1075  --len;
1076  n = (n+1)%16;
1077  }
1078  if (n==0) GCM_MUL (ctx,Xi);
1079  else {
1080  ctx->mres = n;
1081  return 0;
1082  }
1083  }
1084 #if defined(STRICT_ALIGNMENT)
1085  if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1086  break;
1087 #endif
1088 #if defined(GHASH) && defined(GHASH_CHUNK)
1089  while (len>=GHASH_CHUNK) {
1090  size_t j=GHASH_CHUNK;
1091 
1092  GHASH(ctx,in,GHASH_CHUNK);
1093  while (j) {
1094  (*block)(ctx->Yi.c,ctx->EKi.c,key);
1095  ++ctr;
1096  if (is_endian.little)
1097  PUTU32(ctx->Yi.c+12,ctr);
1098  else
1099  ctx->Yi.d[3] = ctr;
1100  for (i=0; i<16; i+=sizeof(size_t))
1101  *(size_t *)(out+i) =
1102  *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1103  out += 16;
1104  in += 16;
1105  j -= 16;
1106  }
1107  len -= GHASH_CHUNK;
1108  }
1109  if ((i = (len&(size_t)-16))) {
1110  GHASH(ctx,in,i);
1111  while (len>=16) {
1112  (*block)(ctx->Yi.c,ctx->EKi.c,key);
1113  ++ctr;
1114  if (is_endian.little)
1115  PUTU32(ctx->Yi.c+12,ctr);
1116  else
1117  ctx->Yi.d[3] = ctr;
1118  for (i=0; i<16; i+=sizeof(size_t))
1119  *(size_t *)(out+i) =
1120  *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1121  out += 16;
1122  in += 16;
1123  len -= 16;
1124  }
1125  }
1126 #else
1127  while (len>=16) {
1128  (*block)(ctx->Yi.c,ctx->EKi.c,key);
1129  ++ctr;
1130  if (is_endian.little)
1131  PUTU32(ctx->Yi.c+12,ctr);
1132  else
1133  ctx->Yi.d[3] = ctr;
1134  for (i=0; i<16; i+=sizeof(size_t)) {
1135  size_t c = *(size_t *)(in+i);
1136  *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
1137  *(size_t *)(ctx->Xi.c+i) ^= c;
1138  }
1139  GCM_MUL(ctx,Xi);
1140  out += 16;
1141  in += 16;
1142  len -= 16;
1143  }
1144 #endif
1145  if (len) {
1146  (*block)(ctx->Yi.c,ctx->EKi.c,key);
1147  ++ctr;
1148  if (is_endian.little)
1149  PUTU32(ctx->Yi.c+12,ctr);
1150  else
1151  ctx->Yi.d[3] = ctr;
1152  while (len--) {
1153  u8 c = in[n];
1154  ctx->Xi.c[n] ^= c;
1155  out[n] = c^ctx->EKi.c[n];
1156  ++n;
1157  }
1158  }
1159 
1160  ctx->mres = n;
1161  return 0;
1162  } while(0);
1163 #endif
1164  for (i=0;i<len;++i) {
1165  u8 c;
1166  if (n==0) {
1167  (*block)(ctx->Yi.c,ctx->EKi.c,key);
1168  ++ctr;
1169  if (is_endian.little)
1170  PUTU32(ctx->Yi.c+12,ctr);
1171  else
1172  ctx->Yi.d[3] = ctr;
1173  }
1174  c = in[i];
1175  out[i] = c^ctx->EKi.c[n];
1176  ctx->Xi.c[n] ^= c;
1177  n = (n+1)%16;
1178  if (n==0)
1179  GCM_MUL(ctx,Xi);
1180  }
1181 
1182  ctx->mres = n;
1183  return 0;
1184 }
1185 
1187  const unsigned char *in, unsigned char *out,
1188  size_t len, ctr128_f stream)
1189 {
1190  const union { long one; char little; } is_endian = {1};
1191  unsigned int n, ctr;
1192  size_t i;
1193  u64 mlen = ctx->len.u[1];
1194  void *key = ctx->key;
1195 #ifdef GCM_FUNCREF_4BIT
1196  void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1197 # ifdef GHASH
1198  void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1199  const u8 *inp,size_t len) = ctx->ghash;
1200 # endif
1201 #endif
1202 
1203  mlen += len;
1204  if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1205  return -1;
1206  ctx->len.u[1] = mlen;
1207 
1208  if (ctx->ares) {
1209  /* First call to encrypt finalizes GHASH(AAD) */
1210  GCM_MUL(ctx,Xi);
1211  ctx->ares = 0;
1212  }
1213 
1214  if (is_endian.little)
1215  ctr = GETU32(ctx->Yi.c+12);
1216  else
1217  ctr = ctx->Yi.d[3];
1218 
1219  n = ctx->mres;
1220  if (n) {
1221  while (n && len) {
1222  ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1223  --len;
1224  n = (n+1)%16;
1225  }
1226  if (n==0) GCM_MUL(ctx,Xi);
1227  else {
1228  ctx->mres = n;
1229  return 0;
1230  }
1231  }
1232 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1233  while (len>=GHASH_CHUNK) {
1234  (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1235  ctr += GHASH_CHUNK/16;
1236  if (is_endian.little)
1237  PUTU32(ctx->Yi.c+12,ctr);
1238  else
1239  ctx->Yi.d[3] = ctr;
1240  GHASH(ctx,out,GHASH_CHUNK);
1241  out += GHASH_CHUNK;
1242  in += GHASH_CHUNK;
1243  len -= GHASH_CHUNK;
1244  }
1245 #endif
1246  if ((i = (len&(size_t)-16))) {
1247  size_t j=i/16;
1248 
1249  (*stream)(in,out,j,key,ctx->Yi.c);
1250  ctr += (unsigned int)j;
1251  if (is_endian.little)
1252  PUTU32(ctx->Yi.c+12,ctr);
1253  else
1254  ctx->Yi.d[3] = ctr;
1255  in += i;
1256  len -= i;
1257 #if defined(GHASH)
1258  GHASH(ctx,out,i);
1259  out += i;
1260 #else
1261  while (j--) {
1262  for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1263  GCM_MUL(ctx,Xi);
1264  out += 16;
1265  }
1266 #endif
1267  }
1268  if (len) {
1269  (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1270  ++ctr;
1271  if (is_endian.little)
1272  PUTU32(ctx->Yi.c+12,ctr);
1273  else
1274  ctx->Yi.d[3] = ctr;
1275  while (len--) {
1276  ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1277  ++n;
1278  }
1279  }
1280 
1281  ctx->mres = n;
1282  return 0;
1283 }
1284 
1286  const unsigned char *in, unsigned char *out,
1287  size_t len,ctr128_f stream)
1288 {
1289  const union { long one; char little; } is_endian = {1};
1290  unsigned int n, ctr;
1291  size_t i;
1292  u64 mlen = ctx->len.u[1];
1293  void *key = ctx->key;
1294 #ifdef GCM_FUNCREF_4BIT
1295  void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1296 # ifdef GHASH
1297  void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1298  const u8 *inp,size_t len) = ctx->ghash;
1299 # endif
1300 #endif
1301 
1302  mlen += len;
1303  if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1304  return -1;
1305  ctx->len.u[1] = mlen;
1306 
1307  if (ctx->ares) {
1308  /* First call to decrypt finalizes GHASH(AAD) */
1309  GCM_MUL(ctx,Xi);
1310  ctx->ares = 0;
1311  }
1312 
1313  if (is_endian.little)
1314  ctr = GETU32(ctx->Yi.c+12);
1315  else
1316  ctr = ctx->Yi.d[3];
1317 
1318  n = ctx->mres;
1319  if (n) {
1320  while (n && len) {
1321  u8 c = *(in++);
1322  *(out++) = c^ctx->EKi.c[n];
1323  ctx->Xi.c[n] ^= c;
1324  --len;
1325  n = (n+1)%16;
1326  }
1327  if (n==0) GCM_MUL (ctx,Xi);
1328  else {
1329  ctx->mres = n;
1330  return 0;
1331  }
1332  }
1333 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1334  while (len>=GHASH_CHUNK) {
1335  GHASH(ctx,in,GHASH_CHUNK);
1336  (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1337  ctr += GHASH_CHUNK/16;
1338  if (is_endian.little)
1339  PUTU32(ctx->Yi.c+12,ctr);
1340  else
1341  ctx->Yi.d[3] = ctr;
1342  out += GHASH_CHUNK;
1343  in += GHASH_CHUNK;
1344  len -= GHASH_CHUNK;
1345  }
1346 #endif
1347  if ((i = (len&(size_t)-16))) {
1348  size_t j=i/16;
1349 
1350 #if defined(GHASH)
1351  GHASH(ctx,in,i);
1352 #else
1353  while (j--) {
1354  size_t k;
1355  for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1356  GCM_MUL(ctx,Xi);
1357  in += 16;
1358  }
1359  j = i/16;
1360  in -= i;
1361 #endif
1362  (*stream)(in,out,j,key,ctx->Yi.c);
1363  ctr += (unsigned int)j;
1364  if (is_endian.little)
1365  PUTU32(ctx->Yi.c+12,ctr);
1366  else
1367  ctx->Yi.d[3] = ctr;
1368  out += i;
1369  in += i;
1370  len -= i;
1371  }
1372  if (len) {
1373  (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1374  ++ctr;
1375  if (is_endian.little)
1376  PUTU32(ctx->Yi.c+12,ctr);
1377  else
1378  ctx->Yi.d[3] = ctr;
1379  while (len--) {
1380  u8 c = in[n];
1381  ctx->Xi.c[n] ^= c;
1382  out[n] = c^ctx->EKi.c[n];
1383  ++n;
1384  }
1385  }
1386 
1387  ctx->mres = n;
1388  return 0;
1389 }
1390 
1391 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1392  size_t len)
1393 {
1394  const union { long one; char little; } is_endian = {1};
1395  u64 alen = ctx->len.u[0]<<3;
1396  u64 clen = ctx->len.u[1]<<3;
1397 #ifdef GCM_FUNCREF_4BIT
1398  void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1399 #endif
1400 
1401  if (ctx->mres)
1402  GCM_MUL(ctx,Xi);
1403 
1404  if (is_endian.little) {
1405 #ifdef BSWAP8
1406  alen = BSWAP8(alen);
1407  clen = BSWAP8(clen);
1408 #else
1409  u8 *p = ctx->len.c;
1410 
1411  ctx->len.u[0] = alen;
1412  ctx->len.u[1] = clen;
1413 
1414  alen = (u64)GETU32(p) <<32|GETU32(p+4);
1415  clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1416 #endif
1417  }
1418 
1419  ctx->Xi.u[0] ^= alen;
1420  ctx->Xi.u[1] ^= clen;
1421  GCM_MUL(ctx,Xi);
1422 
1423  ctx->Xi.u[0] ^= ctx->EK0.u[0];
1424  ctx->Xi.u[1] ^= ctx->EK0.u[1];
1425 
1426  if (tag && len<=sizeof(ctx->Xi))
1427  return memcmp(ctx->Xi.c,tag,len);
1428  else
1429  return -1;
1430 }
1431 
1432 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1433 {
1434  CRYPTO_gcm128_finish(ctx, NULL, 0);
1435  memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1436 }
1437 
1439 {
1440  GCM128_CONTEXT *ret;
1441 
1442  if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1443  CRYPTO_gcm128_init(ret,key,block);
1444 
1445  return ret;
1446 }
1447 
1449 {
1450  if (ctx) {
1451  OPENSSL_cleanse(ctx,sizeof(*ctx));
1452  OPENSSL_free(ctx);
1453  }
1454 }
1455 
1456 #if defined(SELFTEST)
1457 #include <stdio.h>
1458 #include <openssl/aes.h>
1459 
1460 /* Test Case 1 */
1461 static const u8 K1[16],
1462  *P1=NULL,
1463  *A1=NULL,
1464  IV1[12],
1465  *C1=NULL,
1466  T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1467 
1468 /* Test Case 2 */
1469 #define K2 K1
1470 #define A2 A1
1471 #define IV2 IV1
1472 static const u8 P2[16],
1473  C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1474  T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1475 
1476 /* Test Case 3 */
1477 #define A3 A2
1478 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1479  P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1480  0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1481  0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1482  0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1483  IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1484  C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1485  0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1486  0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1487  0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1488  T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1489 
1490 /* Test Case 4 */
1491 #define K4 K3
1492 #define IV4 IV3
1493 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1494  0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1495  0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1496  0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1497  A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1498  0xab,0xad,0xda,0xd2},
1499  C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1500  0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1501  0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1502  0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1503  T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1504 
1505 /* Test Case 5 */
1506 #define K5 K4
1507 #define P5 P4
1508 #define A5 A4
1509 static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1510  C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1511  0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1512  0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1513  0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1514  T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1515 
1516 /* Test Case 6 */
1517 #define K6 K5
1518 #define P6 P5
1519 #define A6 A5
1520 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1521  0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1522  0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1523  0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1524  C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1525  0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1526  0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1527  0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1528  T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1529 
1530 /* Test Case 7 */
1531 static const u8 K7[24],
1532  *P7=NULL,
1533  *A7=NULL,
1534  IV7[12],
1535  *C7=NULL,
1536  T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1537 
1538 /* Test Case 8 */
1539 #define K8 K7
1540 #define IV8 IV7
1541 #define A8 A7
1542 static const u8 P8[16],
1543  C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1544  T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1545 
1546 /* Test Case 9 */
1547 #define A9 A8
1548 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1549  0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1550  P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1551  0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1552  0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1553  0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1554  IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1555  C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1556  0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1557  0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1558  0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1559  T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1560 
1561 /* Test Case 10 */
1562 #define K10 K9
1563 #define IV10 IV9
1564 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1565  0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1566  0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1567  0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1568  A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1569  0xab,0xad,0xda,0xd2},
1570  C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1571  0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1572  0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1573  0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1574  T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1575 
1576 /* Test Case 11 */
1577 #define K11 K10
1578 #define P11 P10
1579 #define A11 A10
1580 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1581  C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1582  0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1583  0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1584  0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1585  T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1586 
1587 /* Test Case 12 */
1588 #define K12 K11
1589 #define P12 P11
1590 #define A12 A11
1591 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1592  0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1593  0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1594  0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1595  C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1596  0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1597  0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1598  0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1599  T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1600 
1601 /* Test Case 13 */
1602 static const u8 K13[32],
1603  *P13=NULL,
1604  *A13=NULL,
1605  IV13[12],
1606  *C13=NULL,
1607  T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1608 
1609 /* Test Case 14 */
1610 #define K14 K13
1611 #define A14 A13
1612 static const u8 P14[16],
1613  IV14[12],
1614  C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1615  T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1616 
1617 /* Test Case 15 */
1618 #define A15 A14
1619 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1620  0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1621  P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1622  0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1623  0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1624  0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1625  IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1626  C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1627  0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1628  0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1629  0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1630  T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1631 
1632 /* Test Case 16 */
1633 #define K16 K15
1634 #define IV16 IV15
1635 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1636  0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1637  0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1638  0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1639  A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1640  0xab,0xad,0xda,0xd2},
1641  C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1642  0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1643  0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1644  0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1645  T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1646 
1647 /* Test Case 17 */
1648 #define K17 K16
1649 #define P17 P16
1650 #define A17 A16
1651 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1652  C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1653  0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1654  0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1655  0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1656  T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1657 
1658 /* Test Case 18 */
1659 #define K18 K17
1660 #define P18 P17
1661 #define A18 A17
1662 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1663  0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1664  0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1665  0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1666  C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1667  0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1668  0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1669  0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1670  T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1671 
1672 #define TEST_CASE(n) do { \
1673  u8 out[sizeof(P##n)]; \
1674  AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1675  CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1676  CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1677  memset(out,0,sizeof(out)); \
1678  if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1679  if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1680  if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1681  (C##n && memcmp(out,C##n,sizeof(out)))) \
1682  ret++, printf ("encrypt test#%d failed.\n",n); \
1683  CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1684  memset(out,0,sizeof(out)); \
1685  if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1686  if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1687  if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1688  (P##n && memcmp(out,P##n,sizeof(out)))) \
1689  ret++, printf ("decrypt test#%d failed.\n",n); \
1690  } while(0)
1691 
1692 int main()
1693 {
1694  GCM128_CONTEXT ctx;
1695  AES_KEY key;
1696  int ret=0;
1697 
1698  TEST_CASE(1);
1699  TEST_CASE(2);
1700  TEST_CASE(3);
1701  TEST_CASE(4);
1702  TEST_CASE(5);
1703  TEST_CASE(6);
1704  TEST_CASE(7);
1705  TEST_CASE(8);
1706  TEST_CASE(9);
1707  TEST_CASE(10);
1708  TEST_CASE(11);
1709  TEST_CASE(12);
1710  TEST_CASE(13);
1711  TEST_CASE(14);
1712  TEST_CASE(15);
1713  TEST_CASE(16);
1714  TEST_CASE(17);
1715  TEST_CASE(18);
1716 
1717 #ifdef OPENSSL_CPUID_OBJ
1718  {
1719  size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1720  union { u64 u; u8 c[1024]; } buf;
1721  int i;
1722 
1723  AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1725  CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1726 
1727  CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1728  start = OPENSSL_rdtsc();
1729  CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1730  gcm_t = OPENSSL_rdtsc() - start;
1731 
1732  CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1733  &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1735  start = OPENSSL_rdtsc();
1736  CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1737  &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1739  ctr_t = OPENSSL_rdtsc() - start;
1740 
1741  printf("%.2f-%.2f=%.2f\n",
1742  gcm_t/(double)sizeof(buf),
1743  ctr_t/(double)sizeof(buf),
1744  (gcm_t-ctr_t)/(double)sizeof(buf));
1745 #ifdef GHASH
1746  GHASH(&ctx,buf.c,sizeof(buf));
1747  start = OPENSSL_rdtsc();
1748  for (i=0;i<100;++i) GHASH(&ctx,buf.c,sizeof(buf));
1749  gcm_t = OPENSSL_rdtsc() - start;
1750  printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
1751 #endif
1752  }
1753 #endif
1754 
1755  return ret;
1756 }
1757 #endif