30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
32 #ifndef OPENSSL_SYS_VMS
42 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
44 typedef __uint128_t uint128_t;
46 #error "Need GCC 3.1 or later to define type uint128_t"
65 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
66 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
70 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
71 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
74 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
75 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
79 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
80 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
81 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
83 {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c,
84 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
85 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
86 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
87 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
88 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
89 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
90 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
92 {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04,
93 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
94 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
95 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
96 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
97 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
98 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
99 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
101 {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b,
102 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
103 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
104 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
105 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
106 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
107 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
108 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
131 static const limb bottom57bits = 0x1ffffffffffffff;
132 static const limb bottom58bits = 0x3ffffffffffffff;
136 static void bin66_to_felem(
felem out,
const u8 in[66])
138 out[0] = (*((
limb*) &in[0])) & bottom58bits;
139 out[1] = (*((
limb*) &in[7]) >> 2) & bottom58bits;
140 out[2] = (*((
limb*) &in[14]) >> 4) & bottom58bits;
141 out[3] = (*((
limb*) &in[21]) >> 6) & bottom58bits;
142 out[4] = (*((
limb*) &in[29])) & bottom58bits;
143 out[5] = (*((
limb*) &in[36]) >> 2) & bottom58bits;
144 out[6] = (*((
limb*) &in[43]) >> 4) & bottom58bits;
145 out[7] = (*((
limb*) &in[50]) >> 6) & bottom58bits;
146 out[8] = (*((
limb*) &in[58])) & bottom57bits;
151 static void felem_to_bin66(
u8 out[66],
const felem in)
154 (*((
limb*) &out[0])) = in[0];
155 (*((
limb*) &out[7])) |= in[1] << 2;
156 (*((
limb*) &out[14])) |= in[2] << 4;
157 (*((
limb*) &out[21])) |= in[3] << 6;
158 (*((
limb*) &out[29])) = in[4];
159 (*((
limb*) &out[36])) |= in[5] << 2;
160 (*((
limb*) &out[43])) |= in[6] << 4;
161 (*((
limb*) &out[50])) |= in[7] << 6;
162 (*((
limb*) &out[58])) = in[8];
166 static void flip_endian(
u8 *out,
const u8 *in,
unsigned len)
169 for (i = 0; i <
len; ++i)
170 out[i] = in[len-1-i];
174 static int BN_to_felem(
felem out,
const BIGNUM *bn)
181 memset(b_out, 0,
sizeof b_out);
183 if (num_bytes >
sizeof b_out)
194 flip_endian(b_out, b_in, num_bytes);
195 bin66_to_felem(out, b_out);
203 felem_to_bin66(b_in, in);
204 flip_endian(b_out, b_in,
sizeof b_out);
205 return BN_bin2bn(b_out,
sizeof b_out, out);
212 static void felem_one(
felem out)
225 static void felem_assign(
felem out,
const felem in)
239 static void felem_sum64(
felem out,
const felem in)
255 out[0] = in[0] * scalar;
256 out[1] = in[1] * scalar;
257 out[2] = in[2] * scalar;
258 out[3] = in[3] * scalar;
259 out[4] = in[4] * scalar;
260 out[5] = in[5] * scalar;
261 out[6] = in[6] * scalar;
262 out[7] = in[7] * scalar;
263 out[8] = in[8] * scalar;
267 static void felem_scalar64(
felem out,
limb scalar)
300 static void felem_neg(
felem out,
const felem in)
303 static const limb two62m3 = (((
limb)1) << 62) - (((
limb)1) << 5);
304 static const limb two62m2 = (((
limb)1) << 62) - (((
limb)1) << 4);
306 out[0] = two62m3 - in[0];
307 out[1] = two62m2 - in[1];
308 out[2] = two62m2 - in[2];
309 out[3] = two62m2 - in[3];
310 out[4] = two62m2 - in[4];
311 out[5] = two62m2 - in[5];
312 out[6] = two62m2 - in[6];
313 out[7] = two62m2 - in[7];
314 out[8] = two62m2 - in[8];
323 static void felem_diff64(
felem out,
const felem in)
326 static const limb two62m3 = (((
limb)1) << 62) - (((
limb)1) << 5);
327 static const limb two62m2 = (((
limb)1) << 62) - (((
limb)1) << 4);
329 out[0] += two62m3 - in[0];
330 out[1] += two62m2 - in[1];
331 out[2] += two62m2 - in[2];
332 out[3] += two62m2 - in[3];
333 out[4] += two62m2 - in[4];
334 out[5] += two62m2 - in[5];
335 out[6] += two62m2 - in[6];
336 out[7] += two62m2 - in[7];
337 out[8] += two62m2 - in[8];
349 static const limb two63m6 = (((
limb)1) << 62) - (((
limb)1) << 5);
350 static const limb two63m5 = (((
limb)1) << 62) - (((
limb)1) << 4);
352 out[0] += two63m6 - in[0];
353 out[1] += two63m5 - in[1];
354 out[2] += two63m5 - in[2];
355 out[3] += two63m5 - in[3];
356 out[4] += two63m5 - in[4];
357 out[5] += two63m5 - in[5];
358 out[6] += two63m5 - in[6];
359 out[7] += two63m5 - in[7];
360 out[8] += two63m5 - in[8];
372 static const uint128_t two127m70 = (((uint128_t)1) << 127) - (((uint128_t)1) << 70);
373 static const uint128_t two127m69 = (((uint128_t)1) << 127) - (((uint128_t)1) << 69);
375 out[0] += (two127m70 - in[0]);
376 out[1] += (two127m69 - in[1]);
377 out[2] += (two127m69 - in[2]);
378 out[3] += (two127m69 - in[3]);
379 out[4] += (two127m69 - in[4]);
380 out[5] += (two127m69 - in[5]);
381 out[6] += (two127m69 - in[6]);
382 out[7] += (two127m69 - in[7]);
383 out[8] += (two127m69 - in[8]);
395 felem_scalar(inx2, in, 2);
396 felem_scalar(inx4, in, 4);
407 out[0] = ((uint128_t) in[0]) * in[0];
408 out[1] = ((uint128_t) in[0]) * inx2[1];
409 out[2] = ((uint128_t) in[0]) * inx2[2] +
410 ((uint128_t) in[1]) * in[1];
411 out[3] = ((uint128_t) in[0]) * inx2[3] +
412 ((uint128_t) in[1]) * inx2[2];
413 out[4] = ((uint128_t) in[0]) * inx2[4] +
414 ((uint128_t) in[1]) * inx2[3] +
415 ((uint128_t) in[2]) * in[2];
416 out[5] = ((uint128_t) in[0]) * inx2[5] +
417 ((uint128_t) in[1]) * inx2[4] +
418 ((uint128_t) in[2]) * inx2[3];
419 out[6] = ((uint128_t) in[0]) * inx2[6] +
420 ((uint128_t) in[1]) * inx2[5] +
421 ((uint128_t) in[2]) * inx2[4] +
422 ((uint128_t) in[3]) * in[3];
423 out[7] = ((uint128_t) in[0]) * inx2[7] +
424 ((uint128_t) in[1]) * inx2[6] +
425 ((uint128_t) in[2]) * inx2[5] +
426 ((uint128_t) in[3]) * inx2[4];
427 out[8] = ((uint128_t) in[0]) * inx2[8] +
428 ((uint128_t) in[1]) * inx2[7] +
429 ((uint128_t) in[2]) * inx2[6] +
430 ((uint128_t) in[3]) * inx2[5] +
431 ((uint128_t) in[4]) * in[4];
441 out[0] += ((uint128_t) in[1]) * inx4[8] +
442 ((uint128_t) in[2]) * inx4[7] +
443 ((uint128_t) in[3]) * inx4[6] +
444 ((uint128_t) in[4]) * inx4[5];
447 out[1] += ((uint128_t) in[2]) * inx4[8] +
448 ((uint128_t) in[3]) * inx4[7] +
449 ((uint128_t) in[4]) * inx4[6] +
450 ((uint128_t) in[5]) * inx2[5];
453 out[2] += ((uint128_t) in[3]) * inx4[8] +
454 ((uint128_t) in[4]) * inx4[7] +
455 ((uint128_t) in[5]) * inx4[6];
458 out[3] += ((uint128_t) in[4]) * inx4[8] +
459 ((uint128_t) in[5]) * inx4[7] +
460 ((uint128_t) in[6]) * inx2[6];
463 out[4] += ((uint128_t) in[5]) * inx4[8] +
464 ((uint128_t) in[6]) * inx4[7];
467 out[5] += ((uint128_t) in[6]) * inx4[8] +
468 ((uint128_t) in[7]) * inx2[7];
471 out[6] += ((uint128_t) in[7]) * inx4[8];
474 out[7] += ((uint128_t) in[8]) * inx2[8];
487 felem_scalar(in2x2, in2, 2);
489 out[0] = ((uint128_t) in1[0]) * in2[0];
491 out[1] = ((uint128_t) in1[0]) * in2[1] +
492 ((uint128_t) in1[1]) * in2[0];
494 out[2] = ((uint128_t) in1[0]) * in2[2] +
495 ((uint128_t) in1[1]) * in2[1] +
496 ((uint128_t) in1[2]) * in2[0];
498 out[3] = ((uint128_t) in1[0]) * in2[3] +
499 ((uint128_t) in1[1]) * in2[2] +
500 ((uint128_t) in1[2]) * in2[1] +
501 ((uint128_t) in1[3]) * in2[0];
503 out[4] = ((uint128_t) in1[0]) * in2[4] +
504 ((uint128_t) in1[1]) * in2[3] +
505 ((uint128_t) in1[2]) * in2[2] +
506 ((uint128_t) in1[3]) * in2[1] +
507 ((uint128_t) in1[4]) * in2[0];
509 out[5] = ((uint128_t) in1[0]) * in2[5] +
510 ((uint128_t) in1[1]) * in2[4] +
511 ((uint128_t) in1[2]) * in2[3] +
512 ((uint128_t) in1[3]) * in2[2] +
513 ((uint128_t) in1[4]) * in2[1] +
514 ((uint128_t) in1[5]) * in2[0];
516 out[6] = ((uint128_t) in1[0]) * in2[6] +
517 ((uint128_t) in1[1]) * in2[5] +
518 ((uint128_t) in1[2]) * in2[4] +
519 ((uint128_t) in1[3]) * in2[3] +
520 ((uint128_t) in1[4]) * in2[2] +
521 ((uint128_t) in1[5]) * in2[1] +
522 ((uint128_t) in1[6]) * in2[0];
524 out[7] = ((uint128_t) in1[0]) * in2[7] +
525 ((uint128_t) in1[1]) * in2[6] +
526 ((uint128_t) in1[2]) * in2[5] +
527 ((uint128_t) in1[3]) * in2[4] +
528 ((uint128_t) in1[4]) * in2[3] +
529 ((uint128_t) in1[5]) * in2[2] +
530 ((uint128_t) in1[6]) * in2[1] +
531 ((uint128_t) in1[7]) * in2[0];
533 out[8] = ((uint128_t) in1[0]) * in2[8] +
534 ((uint128_t) in1[1]) * in2[7] +
535 ((uint128_t) in1[2]) * in2[6] +
536 ((uint128_t) in1[3]) * in2[5] +
537 ((uint128_t) in1[4]) * in2[4] +
538 ((uint128_t) in1[5]) * in2[3] +
539 ((uint128_t) in1[6]) * in2[2] +
540 ((uint128_t) in1[7]) * in2[1] +
541 ((uint128_t) in1[8]) * in2[0];
545 out[0] += ((uint128_t) in1[1]) * in2x2[8] +
546 ((uint128_t) in1[2]) * in2x2[7] +
547 ((uint128_t) in1[3]) * in2x2[6] +
548 ((uint128_t) in1[4]) * in2x2[5] +
549 ((uint128_t) in1[5]) * in2x2[4] +
550 ((uint128_t) in1[6]) * in2x2[3] +
551 ((uint128_t) in1[7]) * in2x2[2] +
552 ((uint128_t) in1[8]) * in2x2[1];
554 out[1] += ((uint128_t) in1[2]) * in2x2[8] +
555 ((uint128_t) in1[3]) * in2x2[7] +
556 ((uint128_t) in1[4]) * in2x2[6] +
557 ((uint128_t) in1[5]) * in2x2[5] +
558 ((uint128_t) in1[6]) * in2x2[4] +
559 ((uint128_t) in1[7]) * in2x2[3] +
560 ((uint128_t) in1[8]) * in2x2[2];
562 out[2] += ((uint128_t) in1[3]) * in2x2[8] +
563 ((uint128_t) in1[4]) * in2x2[7] +
564 ((uint128_t) in1[5]) * in2x2[6] +
565 ((uint128_t) in1[6]) * in2x2[5] +
566 ((uint128_t) in1[7]) * in2x2[4] +
567 ((uint128_t) in1[8]) * in2x2[3];
569 out[3] += ((uint128_t) in1[4]) * in2x2[8] +
570 ((uint128_t) in1[5]) * in2x2[7] +
571 ((uint128_t) in1[6]) * in2x2[6] +
572 ((uint128_t) in1[7]) * in2x2[5] +
573 ((uint128_t) in1[8]) * in2x2[4];
575 out[4] += ((uint128_t) in1[5]) * in2x2[8] +
576 ((uint128_t) in1[6]) * in2x2[7] +
577 ((uint128_t) in1[7]) * in2x2[6] +
578 ((uint128_t) in1[8]) * in2x2[5];
580 out[5] += ((uint128_t) in1[6]) * in2x2[8] +
581 ((uint128_t) in1[7]) * in2x2[7] +
582 ((uint128_t) in1[8]) * in2x2[6];
584 out[6] += ((uint128_t) in1[7]) * in2x2[8] +
585 ((uint128_t) in1[8]) * in2x2[7];
587 out[7] += ((uint128_t) in1[8]) * in2x2[8];
590 static const limb bottom52bits = 0xfffffffffffff;
600 u64 overflow1, overflow2;
602 out[0] = ((
limb) in[0]) & bottom58bits;
603 out[1] = ((
limb) in[1]) & bottom58bits;
604 out[2] = ((
limb) in[2]) & bottom58bits;
605 out[3] = ((
limb) in[3]) & bottom58bits;
606 out[4] = ((
limb) in[4]) & bottom58bits;
607 out[5] = ((
limb) in[5]) & bottom58bits;
608 out[6] = ((
limb) in[6]) & bottom58bits;
609 out[7] = ((
limb) in[7]) & bottom58bits;
610 out[8] = ((
limb) in[8]) & bottom58bits;
614 out[1] += ((
limb) in[0]) >> 58;
615 out[1] += (((
limb) (in[0] >> 64)) & bottom52bits) << 6;
618 out[2] += ((
limb) (in[0] >> 64)) >> 52;
620 out[2] += ((
limb) in[1]) >> 58;
621 out[2] += (((
limb) (in[1] >> 64)) & bottom52bits) << 6;
622 out[3] += ((
limb) (in[1] >> 64)) >> 52;
624 out[3] += ((
limb) in[2]) >> 58;
625 out[3] += (((
limb) (in[2] >> 64)) & bottom52bits) << 6;
626 out[4] += ((
limb) (in[2] >> 64)) >> 52;
628 out[4] += ((
limb) in[3]) >> 58;
629 out[4] += (((
limb) (in[3] >> 64)) & bottom52bits) << 6;
630 out[5] += ((
limb) (in[3] >> 64)) >> 52;
632 out[5] += ((
limb) in[4]) >> 58;
633 out[5] += (((
limb) (in[4] >> 64)) & bottom52bits) << 6;
634 out[6] += ((
limb) (in[4] >> 64)) >> 52;
636 out[6] += ((
limb) in[5]) >> 58;
637 out[6] += (((
limb) (in[5] >> 64)) & bottom52bits) << 6;
638 out[7] += ((
limb) (in[5] >> 64)) >> 52;
640 out[7] += ((
limb) in[6]) >> 58;
641 out[7] += (((
limb) (in[6] >> 64)) & bottom52bits) << 6;
642 out[8] += ((
limb) (in[6] >> 64)) >> 52;
644 out[8] += ((
limb) in[7]) >> 58;
645 out[8] += (((
limb) (in[7] >> 64)) & bottom52bits) << 6;
648 overflow1 = ((
limb) (in[7] >> 64)) >> 52;
650 overflow1 += ((
limb) in[8]) >> 58;
651 overflow1 += (((
limb) (in[8] >> 64)) & bottom52bits) << 6;
652 overflow2 = ((
limb) (in[8] >> 64)) >> 52;
660 out[1] += out[0] >> 58; out[0] &= bottom58bits;
666 static void felem_square_reduce(
felem out,
const felem in)
669 felem_square(tmp, in);
670 felem_reduce(out, tmp);
673 static void felem_mul_reduce(
felem out,
const felem in1,
const felem in2)
676 felem_mul(tmp, in1, in2);
677 felem_reduce(out, tmp);
687 static void felem_inv(
felem out,
const felem in)
689 felem ftmp, ftmp2, ftmp3, ftmp4;
693 felem_square(tmp, in); felem_reduce(ftmp, tmp);
694 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp);
695 felem_assign(ftmp2, ftmp);
696 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
697 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp);
698 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
700 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp);
701 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
702 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp);
704 felem_assign(ftmp2, ftmp3);
705 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
706 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
707 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
708 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
709 felem_assign(ftmp4, ftmp3);
710 felem_mul(tmp, ftmp3, ftmp); felem_reduce(ftmp4, tmp);
711 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp);
712 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp);
713 felem_assign(ftmp2, ftmp3);
715 for (i = 0; i < 8; i++)
717 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
719 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp);
720 felem_assign(ftmp2, ftmp3);
722 for (i = 0; i < 16; i++)
724 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
726 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp);
727 felem_assign(ftmp2, ftmp3);
729 for (i = 0; i < 32; i++)
731 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
733 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp);
734 felem_assign(ftmp2, ftmp3);
736 for (i = 0; i < 64; i++)
738 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
740 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp);
741 felem_assign(ftmp2, ftmp3);
743 for (i = 0; i < 128; i++)
745 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
747 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp);
748 felem_assign(ftmp2, ftmp3);
750 for (i = 0; i < 256; i++)
752 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
754 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp);
756 for (i = 0; i < 9; i++)
758 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
760 felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp);
761 felem_mul(tmp, ftmp3, in); felem_reduce(out, tmp);
765 static const felem kPrime =
767 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
768 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
769 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff
777 static limb felem_is_zero(
const felem in)
781 felem_assign(ftmp, in);
783 ftmp[0] += ftmp[8] >> 57; ftmp[8] &= bottom57bits;
785 ftmp[1] += ftmp[0] >> 58; ftmp[0] &= bottom58bits;
786 ftmp[2] += ftmp[1] >> 58; ftmp[1] &= bottom58bits;
787 ftmp[3] += ftmp[2] >> 58; ftmp[2] &= bottom58bits;
788 ftmp[4] += ftmp[3] >> 58; ftmp[3] &= bottom58bits;
789 ftmp[5] += ftmp[4] >> 58; ftmp[4] &= bottom58bits;
790 ftmp[6] += ftmp[5] >> 58; ftmp[5] &= bottom58bits;
791 ftmp[7] += ftmp[6] >> 58; ftmp[6] &= bottom58bits;
792 ftmp[8] += ftmp[7] >> 58; ftmp[7] &= bottom58bits;
813 is_zero = ((
s64) is_zero) >> 63;
815 is_p = ftmp[0] ^ kPrime[0];
816 is_p |= ftmp[1] ^ kPrime[1];
817 is_p |= ftmp[2] ^ kPrime[2];
818 is_p |= ftmp[3] ^ kPrime[3];
819 is_p |= ftmp[4] ^ kPrime[4];
820 is_p |= ftmp[5] ^ kPrime[5];
821 is_p |= ftmp[6] ^ kPrime[6];
822 is_p |= ftmp[7] ^ kPrime[7];
823 is_p |= ftmp[8] ^ kPrime[8];
826 is_p = ((
s64) is_p) >> 63;
832 static int felem_is_zero_int(
const felem in)
834 return (
int) (felem_is_zero(in) & ((
limb)1));
841 static void felem_contract(
felem out,
const felem in)
843 limb is_p, is_greater, sign;
844 static const limb two58 = ((
limb)1) << 58;
846 felem_assign(out, in);
848 out[0] += out[8] >> 57; out[8] &= bottom57bits;
850 out[1] += out[0] >> 58; out[0] &= bottom58bits;
851 out[2] += out[1] >> 58; out[1] &= bottom58bits;
852 out[3] += out[2] >> 58; out[2] &= bottom58bits;
853 out[4] += out[3] >> 58; out[3] &= bottom58bits;
854 out[5] += out[4] >> 58; out[4] &= bottom58bits;
855 out[6] += out[5] >> 58; out[5] &= bottom58bits;
856 out[7] += out[6] >> 58; out[6] &= bottom58bits;
857 out[8] += out[7] >> 58; out[7] &= bottom58bits;
866 is_p = out[0] ^ kPrime[0];
867 is_p |= out[1] ^ kPrime[1];
868 is_p |= out[2] ^ kPrime[2];
869 is_p |= out[3] ^ kPrime[3];
870 is_p |= out[4] ^ kPrime[4];
871 is_p |= out[5] ^ kPrime[5];
872 is_p |= out[6] ^ kPrime[6];
873 is_p |= out[7] ^ kPrime[7];
874 is_p |= out[8] ^ kPrime[8];
883 is_p = ((
s64) is_p) >> 63;
900 is_greater = out[8] >> 57;
901 is_greater |= is_greater << 32;
902 is_greater |= is_greater << 16;
903 is_greater |= is_greater << 8;
904 is_greater |= is_greater << 4;
905 is_greater |= is_greater << 2;
906 is_greater |= is_greater << 1;
907 is_greater = ((
s64) is_greater) >> 63;
909 out[0] -= kPrime[0] & is_greater;
910 out[1] -= kPrime[1] & is_greater;
911 out[2] -= kPrime[2] & is_greater;
912 out[3] -= kPrime[3] & is_greater;
913 out[4] -= kPrime[4] & is_greater;
914 out[5] -= kPrime[5] & is_greater;
915 out[6] -= kPrime[6] & is_greater;
916 out[7] -= kPrime[7] & is_greater;
917 out[8] -= kPrime[8] & is_greater;
920 sign = -(out[0] >> 63); out[0] += (two58 & sign); out[1] -= (1 & sign);
921 sign = -(out[1] >> 63); out[1] += (two58 & sign); out[2] -= (1 & sign);
922 sign = -(out[2] >> 63); out[2] += (two58 & sign); out[3] -= (1 & sign);
923 sign = -(out[3] >> 63); out[3] += (two58 & sign); out[4] -= (1 & sign);
924 sign = -(out[4] >> 63); out[4] += (two58 & sign); out[5] -= (1 & sign);
925 sign = -(out[0] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign);
926 sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign);
927 sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign);
928 sign = -(out[5] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign);
929 sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign);
930 sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign);
952 felem delta, gamma, beta, alpha, ftmp, ftmp2;
954 felem_assign(ftmp, x_in);
955 felem_assign(ftmp2, x_in);
958 felem_square(tmp, z_in);
959 felem_reduce(delta, tmp);
962 felem_square(tmp, y_in);
963 felem_reduce(gamma, tmp);
966 felem_mul(tmp, x_in, gamma);
967 felem_reduce(beta, tmp);
970 felem_diff64(ftmp, delta);
972 felem_sum64(ftmp2, delta);
974 felem_scalar64(ftmp2, 3);
976 felem_mul(tmp, ftmp, ftmp2);
982 felem_reduce(alpha, tmp);
985 felem_square(tmp, alpha);
988 felem_assign(ftmp, beta);
989 felem_scalar64(ftmp, 8);
991 felem_diff_128_64(tmp, ftmp);
993 felem_reduce(x_out, tmp);
996 felem_sum64(delta, gamma);
998 felem_assign(ftmp, y_in);
999 felem_sum64(ftmp, z_in);
1001 felem_square(tmp, ftmp);
1004 felem_diff_128_64(tmp, delta);
1006 felem_reduce(z_out, tmp);
1009 felem_scalar64(beta, 4);
1011 felem_diff64(beta, x_out);
1013 felem_mul(tmp, alpha, beta);
1018 felem_square(tmp2, gamma);
1021 felem_scalar128(tmp2, 8);
1025 felem_diff128(tmp, tmp2);
1030 felem_reduce(y_out, tmp);
1038 for (i = 0; i <
NLIMBS; ++i)
1040 const limb tmp = mask & (in[i] ^ out[i]);
1059 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1061 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1063 z1_is_zero = felem_is_zero(z1);
1064 z2_is_zero = felem_is_zero(z2);
1067 felem_square(tmp, z1);
1068 felem_reduce(ftmp, tmp);
1073 felem_square(tmp, z2);
1074 felem_reduce(ftmp2, tmp);
1077 felem_mul(tmp, x1, ftmp2);
1078 felem_reduce(ftmp3, tmp);
1081 felem_assign(ftmp5, z1);
1082 felem_sum64(ftmp5, z2);
1086 felem_square(tmp, ftmp5);
1088 felem_diff_128_64(tmp, ftmp);
1090 felem_diff_128_64(tmp, ftmp2);
1092 felem_reduce(ftmp5, tmp);
1095 felem_mul(tmp, ftmp2, z2);
1096 felem_reduce(ftmp2, tmp);
1099 felem_mul(tmp, y1, ftmp2);
1100 felem_reduce(ftmp6, tmp);
1107 felem_assign(ftmp3, x1);
1110 felem_scalar(ftmp5, z1, 2);
1113 felem_assign(ftmp6, y1);
1117 felem_mul(tmp, x2, ftmp);
1121 felem_diff_128_64(tmp, ftmp3);
1123 felem_reduce(ftmp4, tmp);
1125 x_equal = felem_is_zero(ftmp4);
1128 felem_mul(tmp, ftmp5, ftmp4);
1129 felem_reduce(z_out, tmp);
1132 felem_mul(tmp, ftmp, z1);
1133 felem_reduce(ftmp, tmp);
1136 felem_mul(tmp, y2, ftmp);
1140 felem_diff_128_64(tmp, ftmp6);
1142 felem_reduce(ftmp5, tmp);
1143 y_equal = felem_is_zero(ftmp5);
1144 felem_scalar64(ftmp5, 2);
1147 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1149 point_double(x3, y3, z3, x1, y1, z1);
1154 felem_assign(ftmp, ftmp4);
1155 felem_scalar64(ftmp, 2);
1157 felem_square(tmp, ftmp);
1159 felem_reduce(ftmp, tmp);
1162 felem_mul(tmp, ftmp4, ftmp);
1163 felem_reduce(ftmp2, tmp);
1166 felem_mul(tmp, ftmp3, ftmp);
1167 felem_reduce(ftmp4, tmp);
1170 felem_square(tmp, ftmp5);
1172 felem_diff_128_64(tmp, ftmp2);
1174 felem_assign(ftmp3, ftmp4);
1175 felem_scalar64(ftmp4, 2);
1177 felem_diff_128_64(tmp, ftmp4);
1179 felem_reduce(x_out, tmp);
1182 felem_diff64(ftmp3, x_out);
1185 felem_mul(tmp, ftmp5, ftmp3);
1187 felem_mul(tmp2, ftmp6, ftmp2);
1189 felem_scalar128(tmp2, 2);
1191 felem_diff128(tmp, tmp2);
1195 felem_reduce(y_out, tmp);
1197 copy_conditional(x_out, x2, z1_is_zero);
1198 copy_conditional(x_out, x1, z2_is_zero);
1199 copy_conditional(y_out, y2, z1_is_zero);
1200 copy_conditional(y_out, y1, z2_is_zero);
1201 copy_conditional(z_out, z2, z1_is_zero);
1202 copy_conditional(z_out, z1, z2_is_zero);
1203 felem_assign(x3, x_out);
1204 felem_assign(y3, y_out);
1205 felem_assign(z3, z_out);
1242 static const felem gmul[16][3] =
1243 {{{0, 0, 0, 0, 0, 0, 0, 0, 0},
1244 {0, 0, 0, 0, 0, 0, 0, 0, 0},
1245 {0, 0, 0, 0, 0, 0, 0, 0, 0}},
1246 {{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
1247 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
1248 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},
1249 {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
1250 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
1251 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},
1252 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1253 {{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
1254 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
1255 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},
1256 {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
1257 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
1258 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},
1259 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1260 {{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
1261 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
1262 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},
1263 {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
1264 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
1265 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},
1266 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1267 {{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
1268 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
1269 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},
1270 {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
1271 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
1272 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},
1273 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1274 {{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
1275 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
1276 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},
1277 {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
1278 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
1279 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},
1280 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1281 {{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
1282 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
1283 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},
1284 {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
1285 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
1286 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},
1287 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1288 {{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
1289 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
1290 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},
1291 {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
1292 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
1293 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},
1294 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1295 {{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
1296 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
1297 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},
1298 {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
1299 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
1300 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},
1301 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1302 {{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
1303 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
1304 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},
1305 {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
1306 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
1307 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},
1308 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1309 {{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
1310 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
1311 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},
1312 {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
1313 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
1314 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},
1315 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1316 {{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
1317 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
1318 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},
1319 {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
1320 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
1321 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},
1322 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1323 {{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
1324 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
1325 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},
1326 {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
1327 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
1328 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},
1329 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1330 {{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
1331 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
1332 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},
1333 {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
1334 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
1335 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},
1336 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1337 {{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
1338 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
1339 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},
1340 {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
1341 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
1342 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},
1343 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1344 {{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
1345 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
1346 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},
1347 {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
1348 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
1349 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},
1350 {1, 0, 0, 0, 0, 0, 0, 0, 0}}};
1354 static void select_point(
const limb idx,
unsigned int size,
const felem pre_comp[][3],
1358 limb *outlimbs = &out[0][0];
1359 memset(outlimbs, 0, 3 *
sizeof(
felem));
1361 for (i = 0; i < size; i++)
1363 const limb *inlimbs = &pre_comp[i][0][0];
1364 limb mask = i ^ idx;
1370 for (j = 0; j <
NLIMBS * 3; j++)
1371 outlimbs[j] |= inlimbs[j] & mask;
1380 return (in[i >> 3] >> (i & 7)) & 1;
1390 const int mixed,
const felem pre_comp[][17][3],
const felem g_pre_comp[16][3])
1393 unsigned num, gen_mul = (g_scalar != NULL);
1394 felem nq[3], tmp[4];
1399 memset(nq, 0, 3 *
sizeof(
felem));
1406 for (i = (num_points ? 520 : 130); i >= 0; --i)
1410 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1413 if (gen_mul && (i <= 130))
1415 bits = get_bit(g_scalar, i + 390) << 3;
1418 bits |= get_bit(g_scalar, i + 260) << 2;
1419 bits |= get_bit(g_scalar, i + 130) << 1;
1420 bits |= get_bit(g_scalar, i);
1423 select_point(bits, 16, g_pre_comp, tmp);
1426 point_add(nq[0], nq[1], nq[2],
1427 nq[0], nq[1], nq[2],
1428 1 , tmp[0], tmp[1], tmp[2]);
1432 memcpy(nq, tmp, 3 *
sizeof(
felem));
1438 if (num_points && (i % 5 == 0))
1441 for (num = 0; num < num_points; ++
num)
1443 bits = get_bit(scalars[num], i + 4) << 5;
1444 bits |= get_bit(scalars[num], i + 3) << 4;
1445 bits |= get_bit(scalars[num], i + 2) << 3;
1446 bits |= get_bit(scalars[num], i + 1) << 2;
1447 bits |= get_bit(scalars[num], i) << 1;
1448 bits |= get_bit(scalars[num], i - 1);
1452 select_point(digit, 17, pre_comp[num], tmp);
1453 felem_neg(tmp[3], tmp[1]);
1454 copy_conditional(tmp[1], tmp[3], (-(
limb) sign));
1458 point_add(nq[0], nq[1], nq[2],
1459 nq[0], nq[1], nq[2],
1460 mixed, tmp[0], tmp[1], tmp[2]);
1464 memcpy(nq, tmp, 3 *
sizeof(
felem));
1470 felem_assign(x_out, nq[0]);
1471 felem_assign(y_out, nq[1]);
1472 felem_assign(z_out, nq[2]);
1547 static void *nistp521_pre_comp_dup(
void *src_)
1557 static void nistp521_pre_comp_free(
void *pre_)
1572 static void nistp521_pre_comp_clear_free(
void *pre_)
1605 BIGNUM *curve_p, *curve_a, *curve_b;
1608 if ((ctx = new_ctx =
BN_CTX_new()) == NULL)
return 0;
1612 ((curve_b =
BN_CTX_get(ctx)) == NULL))
goto err;
1627 if (new_ctx != NULL)
1637 felem z1, z2, x_in, y_in, x_out, y_out;
1646 if ((!BN_to_felem(x_in, &point->
X)) || (!BN_to_felem(y_in, &point->
Y)) ||
1647 (!BN_to_felem(z1, &point->
Z)))
return 0;
1649 felem_square(tmp, z2); felem_reduce(z1, tmp);
1650 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1651 felem_contract(x_out, x_in);
1654 if (!felem_to_BN(x, x_out))
1660 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1661 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1662 felem_contract(y_out, y_in);
1665 if (!felem_to_BN(y, y_out))
1674 static void make_points_affine(
size_t num,
felem points[][3],
felem tmp_felems[])
1683 (
void (*)(
void *)) felem_one,
1684 (
int (*)(
const void *)) felem_is_zero_int,
1685 (
void (*)(
void *,
const void *)) felem_assign,
1686 (
void (*)(
void *,
const void *)) felem_square_reduce,
1687 (
void (*)(
void *,
const void *,
const void *)) felem_mul_reduce,
1688 (
void (*)(
void *,
const void *)) felem_inv,
1689 (
void (*)(
void *,
const void *)) felem_contract);
1702 BIGNUM *x, *y, *z, *tmp_scalar;
1705 felem (*pre_comp)[17][3] = NULL;
1706 felem *tmp_felems = NULL;
1708 unsigned i, num_bytes;
1709 int have_pre_comp = 0;
1710 size_t num_points =
num;
1711 felem x_in, y_in, z_in, x_out, y_out, z_out;
1713 felem (*g_pre_comp)[3] = NULL;
1716 const BIGNUM *p_scalar = NULL;
1719 if ((ctx = new_ctx =
BN_CTX_new()) == NULL)
return 0;
1730 nistp521_pre_comp_dup, nistp521_pre_comp_free,
1731 nistp521_pre_comp_clear_free);
1737 g_pre_comp = (
felem (*)[3]) gmul;
1739 if (generator == NULL)
1742 if (!felem_to_BN(x, g_pre_comp[1][0]) ||
1743 !felem_to_BN(y, g_pre_comp[1][1]) ||
1744 !felem_to_BN(z, g_pre_comp[1][2]))
1750 generator, x, y, z, ctx))
1763 if (num_points >= 2)
1773 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL)))
1782 memset(pre_comp, 0, num_points * 17 * 3 *
sizeof(
felem));
1783 for (i = 0; i < num_points; ++i)
1796 p_scalar = scalars[i];
1798 if ((p_scalar != NULL) && (p != NULL))
1814 flip_endian(secrets[i], tmp, num_bytes);
1816 if ((!BN_to_felem(x_out, &p->
X)) ||
1817 (!BN_to_felem(y_out, &p->
Y)) ||
1818 (!BN_to_felem(z_out, &p->
Z)))
goto err;
1819 memcpy(pre_comp[i][1][0], x_out,
sizeof(
felem));
1820 memcpy(pre_comp[i][1][1], y_out,
sizeof(
felem));
1821 memcpy(pre_comp[i][1][2], z_out,
sizeof(
felem));
1822 for (j = 2; j <= 16; ++j)
1827 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1828 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1829 0, pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1834 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1835 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1841 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1845 if ((scalar != NULL) && (have_pre_comp))
1847 memset(g_secret, 0,
sizeof(g_secret));
1862 flip_endian(g_secret, tmp, num_bytes);
1864 batch_mul(x_out, y_out, z_out,
1867 mixed, (
const felem (*)[17][3]) pre_comp,
1868 (
const felem (*)[3]) g_pre_comp);
1872 batch_mul(x_out, y_out, z_out,
1874 NULL, mixed, (
const felem (*)[17][3]) pre_comp, NULL);
1876 felem_contract(x_in, x_out);
1877 felem_contract(y_in, y_out);
1878 felem_contract(z_in, z_out);
1879 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1880 (!felem_to_BN(z, z_in)))
1889 if (generator != NULL)
1891 if (new_ctx != NULL)
1893 if (secrets != NULL)
1895 if (pre_comp != NULL)
1897 if (tmp_felems != NULL)
1910 felem tmp_felems[16];
1914 nistp521_pre_comp_free, nistp521_pre_comp_clear_free);
1916 if ((ctx = new_ctx =
BN_CTX_new()) == NULL)
return 0;
1924 if (generator == NULL)
1930 if ((pre = nistp521_pre_comp_new()) == NULL)
1944 for (i = 1; i <= 4; i <<= 1)
1949 for (j = 0; j < 129; ++j)
1986 for (i = 1; i < 8; ++i)
1995 make_points_affine(15, &(pre->
g_pre_comp[1]), tmp_felems);
1998 nistp521_pre_comp_free, nistp521_pre_comp_clear_free))
2004 if (generator != NULL)
2006 if (new_ctx != NULL)
2009 nistp521_pre_comp_free(pre);
2016 nistp521_pre_comp_free, nistp521_pre_comp_clear_free)
2024 static void *dummy=&dummy;