Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
gf128mul.c
Go to the documentation of this file.
1 /* gf128mul.c - GF(2^128) multiplication functions
2  *
3  * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
4  * Copyright (c) 2006, Rik Snel <[email protected]>
5  *
6  * Based on Dr Brian Gladman's (GPL'd) work published at
7  * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
8  * See the original copyright notice below.
9  *
10  * This program is free software; you can redistribute it and/or modify it
11  * under the terms of the GNU General Public License as published by the Free
12  * Software Foundation; either version 2 of the License, or (at your option)
13  * any later version.
14  */
15 
16 /*
17  ---------------------------------------------------------------------------
18  Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.
19 
20  LICENSE TERMS
21 
22  The free distribution and use of this software in both source and binary
23  form is allowed (with or without changes) provided that:
24 
25  1. distributions of this source code include the above copyright
26  notice, this list of conditions and the following disclaimer;
27 
28  2. distributions in binary form include the above copyright
29  notice, this list of conditions and the following disclaimer
30  in the documentation and/or other associated materials;
31 
32  3. the copyright holder's name is not used to endorse products
33  built using this software without specific written permission.
34 
35  ALTERNATIVELY, provided that this notice is retained in full, this product
36  may be distributed under the terms of the GNU General Public License (GPL),
37  in which case the provisions of the GPL apply INSTEAD OF those given above.
38 
39  DISCLAIMER
40 
41  This software is provided 'as is' with no explicit or implied warranties
42  in respect of its properties, including, but not limited to, correctness
43  and/or fitness for purpose.
44  ---------------------------------------------------------------------------
45  Issue 31/01/2006
46 
47  This file provides fast multiplication in GF(128) as required by several
48  cryptographic authentication modes
49 */
50 
51 #include <crypto/gf128mul.h>
52 #include <linux/kernel.h>
53 #include <linux/module.h>
54 #include <linux/slab.h>
55 
56 #define gf128mul_dat(q) { \
57  q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
58  q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
59  q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
60  q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
61  q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
62  q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
63  q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
64  q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
65  q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
66  q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
67  q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
68  q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
69  q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
70  q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
71  q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
72  q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
73  q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
74  q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
75  q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
76  q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
77  q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
78  q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
79  q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
80  q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
81  q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
82  q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
83  q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
84  q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
85  q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
86  q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
87  q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
88  q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
89 }
90 
91 /* Given the value i in 0..255 as the byte overflow when a field element
92  in GHASH is multiplied by x^8, this function will return the values that
93  are generated in the lo 16-bit word of the field value by applying the
94  modular polynomial. The values lo_byte and hi_byte are returned via the
95  macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into
96  memory as required by a suitable definition of this macro operating on
97  the table above
98 */
99 
100 #define xx(p, q) 0x##p##q
101 
102 #define xda_bbe(i) ( \
103  (i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \
104  (i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \
105  (i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \
106  (i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \
107 )
108 
109 #define xda_lle(i) ( \
110  (i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \
111  (i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \
112  (i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \
113  (i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \
114 )
115 
116 static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
117 static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);
118 
119 /* These functions multiply a field element by x, by x^4 and by x^8
120  * in the polynomial field representation. It uses 32-bit word operations
121  * to gain speed but compensates for machine endianess and hence works
122  * correctly on both styles of machine.
123  */
124 
125 static void gf128mul_x_lle(be128 *r, const be128 *x)
126 {
127  u64 a = be64_to_cpu(x->a);
128  u64 b = be64_to_cpu(x->b);
129  u64 _tt = gf128mul_table_lle[(b << 7) & 0xff];
130 
131  r->b = cpu_to_be64((b >> 1) | (a << 63));
132  r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
133 }
134 
135 static void gf128mul_x_bbe(be128 *r, const be128 *x)
136 {
137  u64 a = be64_to_cpu(x->a);
138  u64 b = be64_to_cpu(x->b);
139  u64 _tt = gf128mul_table_bbe[a >> 63];
140 
141  r->a = cpu_to_be64((a << 1) | (b >> 63));
142  r->b = cpu_to_be64((b << 1) ^ _tt);
143 }
144 
145 void gf128mul_x_ble(be128 *r, const be128 *x)
146 {
147  u64 a = le64_to_cpu(x->a);
148  u64 b = le64_to_cpu(x->b);
149  u64 _tt = gf128mul_table_bbe[b >> 63];
150 
151  r->a = cpu_to_le64((a << 1) ^ _tt);
152  r->b = cpu_to_le64((b << 1) | (a >> 63));
153 }
155 
156 static void gf128mul_x8_lle(be128 *x)
157 {
158  u64 a = be64_to_cpu(x->a);
159  u64 b = be64_to_cpu(x->b);
160  u64 _tt = gf128mul_table_lle[b & 0xff];
161 
162  x->b = cpu_to_be64((b >> 8) | (a << 56));
163  x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
164 }
165 
166 static void gf128mul_x8_bbe(be128 *x)
167 {
168  u64 a = be64_to_cpu(x->a);
169  u64 b = be64_to_cpu(x->b);
170  u64 _tt = gf128mul_table_bbe[a >> 56];
171 
172  x->a = cpu_to_be64((a << 8) | (b >> 56));
173  x->b = cpu_to_be64((b << 8) ^ _tt);
174 }
175 
176 void gf128mul_lle(be128 *r, const be128 *b)
177 {
178  be128 p[8];
179  int i;
180 
181  p[0] = *r;
182  for (i = 0; i < 7; ++i)
183  gf128mul_x_lle(&p[i + 1], &p[i]);
184 
185  memset(r, 0, sizeof(*r));
186  for (i = 0;;) {
187  u8 ch = ((u8 *)b)[15 - i];
188 
189  if (ch & 0x80)
190  be128_xor(r, r, &p[0]);
191  if (ch & 0x40)
192  be128_xor(r, r, &p[1]);
193  if (ch & 0x20)
194  be128_xor(r, r, &p[2]);
195  if (ch & 0x10)
196  be128_xor(r, r, &p[3]);
197  if (ch & 0x08)
198  be128_xor(r, r, &p[4]);
199  if (ch & 0x04)
200  be128_xor(r, r, &p[5]);
201  if (ch & 0x02)
202  be128_xor(r, r, &p[6]);
203  if (ch & 0x01)
204  be128_xor(r, r, &p[7]);
205 
206  if (++i >= 16)
207  break;
208 
209  gf128mul_x8_lle(r);
210  }
211 }
213 
214 void gf128mul_bbe(be128 *r, const be128 *b)
215 {
216  be128 p[8];
217  int i;
218 
219  p[0] = *r;
220  for (i = 0; i < 7; ++i)
221  gf128mul_x_bbe(&p[i + 1], &p[i]);
222 
223  memset(r, 0, sizeof(*r));
224  for (i = 0;;) {
225  u8 ch = ((u8 *)b)[i];
226 
227  if (ch & 0x80)
228  be128_xor(r, r, &p[7]);
229  if (ch & 0x40)
230  be128_xor(r, r, &p[6]);
231  if (ch & 0x20)
232  be128_xor(r, r, &p[5]);
233  if (ch & 0x10)
234  be128_xor(r, r, &p[4]);
235  if (ch & 0x08)
236  be128_xor(r, r, &p[3]);
237  if (ch & 0x04)
238  be128_xor(r, r, &p[2]);
239  if (ch & 0x02)
240  be128_xor(r, r, &p[1]);
241  if (ch & 0x01)
242  be128_xor(r, r, &p[0]);
243 
244  if (++i >= 16)
245  break;
246 
247  gf128mul_x8_bbe(r);
248  }
249 }
251 
252 /* This version uses 64k bytes of table space.
253  A 16 byte buffer has to be multiplied by a 16 byte key
254  value in GF(128). If we consider a GF(128) value in
255  the buffer's lowest byte, we can construct a table of
256  the 256 16 byte values that result from the 256 values
257  of this byte. This requires 4096 bytes. But we also
258  need tables for each of the 16 higher bytes in the
259  buffer as well, which makes 64 kbytes in total.
260 */
261 /* additional explanation
262  * t[0][BYTE] contains g*BYTE
263  * t[1][BYTE] contains g*x^8*BYTE
264  * ..
265  * t[15][BYTE] contains g*x^120*BYTE */
267 {
268  struct gf128mul_64k *t;
269  int i, j, k;
270 
271  t = kzalloc(sizeof(*t), GFP_KERNEL);
272  if (!t)
273  goto out;
274 
275  for (i = 0; i < 16; i++) {
276  t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
277  if (!t->t[i]) {
279  t = NULL;
280  goto out;
281  }
282  }
283 
284  t->t[0]->t[128] = *g;
285  for (j = 64; j > 0; j >>= 1)
286  gf128mul_x_lle(&t->t[0]->t[j], &t->t[0]->t[j + j]);
287 
288  for (i = 0;;) {
289  for (j = 2; j < 256; j += j)
290  for (k = 1; k < j; ++k)
291  be128_xor(&t->t[i]->t[j + k],
292  &t->t[i]->t[j], &t->t[i]->t[k]);
293 
294  if (++i >= 16)
295  break;
296 
297  for (j = 128; j > 0; j >>= 1) {
298  t->t[i]->t[j] = t->t[i - 1]->t[j];
299  gf128mul_x8_lle(&t->t[i]->t[j]);
300  }
301  }
302 
303 out:
304  return t;
305 }
307 
309 {
310  struct gf128mul_64k *t;
311  int i, j, k;
312 
313  t = kzalloc(sizeof(*t), GFP_KERNEL);
314  if (!t)
315  goto out;
316 
317  for (i = 0; i < 16; i++) {
318  t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
319  if (!t->t[i]) {
321  t = NULL;
322  goto out;
323  }
324  }
325 
326  t->t[0]->t[1] = *g;
327  for (j = 1; j <= 64; j <<= 1)
328  gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
329 
330  for (i = 0;;) {
331  for (j = 2; j < 256; j += j)
332  for (k = 1; k < j; ++k)
333  be128_xor(&t->t[i]->t[j + k],
334  &t->t[i]->t[j], &t->t[i]->t[k]);
335 
336  if (++i >= 16)
337  break;
338 
339  for (j = 128; j > 0; j >>= 1) {
340  t->t[i]->t[j] = t->t[i - 1]->t[j];
341  gf128mul_x8_bbe(&t->t[i]->t[j]);
342  }
343  }
344 
345 out:
346  return t;
347 }
349 
351 {
352  int i;
353 
354  for (i = 0; i < 16; i++)
355  kfree(t->t[i]);
356  kfree(t);
357 }
359 
361 {
362  u8 *ap = (u8 *)a;
363  be128 r[1];
364  int i;
365 
366  *r = t->t[0]->t[ap[0]];
367  for (i = 1; i < 16; ++i)
368  be128_xor(r, r, &t->t[i]->t[ap[i]]);
369  *a = *r;
370 }
372 
374 {
375  u8 *ap = (u8 *)a;
376  be128 r[1];
377  int i;
378 
379  *r = t->t[0]->t[ap[15]];
380  for (i = 1; i < 16; ++i)
381  be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
382  *a = *r;
383 }
385 
386 /* This version uses 4k bytes of table space.
387  A 16 byte buffer has to be multiplied by a 16 byte key
388  value in GF(128). If we consider a GF(128) value in a
389  single byte, we can construct a table of the 256 16 byte
390  values that result from the 256 values of this byte.
391  This requires 4096 bytes. If we take the highest byte in
392  the buffer and use this table to get the result, we then
393  have to multiply by x^120 to get the final value. For the
394  next highest byte the result has to be multiplied by x^112
395  and so on. But we can do this by accumulating the result
396  in an accumulator starting with the result for the top
397  byte. We repeatedly multiply the accumulator value by
398  x^8 and then add in (i.e. xor) the 16 bytes of the next
399  lower byte in the buffer, stopping when we reach the
400  lowest byte. This requires a 4096 byte table.
401 */
403 {
404  struct gf128mul_4k *t;
405  int j, k;
406 
407  t = kzalloc(sizeof(*t), GFP_KERNEL);
408  if (!t)
409  goto out;
410 
411  t->t[128] = *g;
412  for (j = 64; j > 0; j >>= 1)
413  gf128mul_x_lle(&t->t[j], &t->t[j+j]);
414 
415  for (j = 2; j < 256; j += j)
416  for (k = 1; k < j; ++k)
417  be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
418 
419 out:
420  return t;
421 }
423 
425 {
426  struct gf128mul_4k *t;
427  int j, k;
428 
429  t = kzalloc(sizeof(*t), GFP_KERNEL);
430  if (!t)
431  goto out;
432 
433  t->t[1] = *g;
434  for (j = 1; j <= 64; j <<= 1)
435  gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
436 
437  for (j = 2; j < 256; j += j)
438  for (k = 1; k < j; ++k)
439  be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
440 
441 out:
442  return t;
443 }
445 
447 {
448  u8 *ap = (u8 *)a;
449  be128 r[1];
450  int i = 15;
451 
452  *r = t->t[ap[15]];
453  while (i--) {
454  gf128mul_x8_lle(r);
455  be128_xor(r, r, &t->t[ap[i]]);
456  }
457  *a = *r;
458 }
460 
462 {
463  u8 *ap = (u8 *)a;
464  be128 r[1];
465  int i = 0;
466 
467  *r = t->t[ap[0]];
468  while (++i < 16) {
469  gf128mul_x8_bbe(r);
470  be128_xor(r, r, &t->t[ap[i]]);
471  }
472  *a = *r;
473 }
475 
476 MODULE_LICENSE("GPL");
477 MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");