30 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
32 #ifndef OPENSSL_SYS_VMS
42 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
44 typedef __uint128_t uint128_t;
45 typedef __int128_t int128_t;
47 #error "Need GCC 3.1 or later to define type uint128_t"
65 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
66 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
67 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
69 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
70 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
71 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc},
73 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
74 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
75 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
76 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
77 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47,
78 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
79 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
80 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
81 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b,
82 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
83 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
84 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
115 static const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
116 static const limb bottom32bits = 0xffffffff;
117 static const u64 bottom63bits = 0x7ffffffffffffffful;
121 static void bin32_to_felem(
felem out,
const u8 in[32])
123 out[0] = *((
u64*) &in[0]);
124 out[1] = *((
u64*) &in[8]);
125 out[2] = *((
u64*) &in[16]);
126 out[3] = *((
u64*) &in[24]);
131 static void smallfelem_to_bin32(
u8 out[32],
const smallfelem in)
133 *((
u64*) &out[0]) = in[0];
134 *((
u64*) &out[8]) = in[1];
135 *((
u64*) &out[16]) = in[2];
136 *((
u64*) &out[24]) = in[3];
140 static void flip_endian(
u8 *out,
const u8 *in,
unsigned len)
143 for (i = 0; i <
len; ++i)
144 out[i] = in[len-1-i];
148 static int BN_to_felem(
felem out,
const BIGNUM *bn)
155 memset(b_out, 0,
sizeof b_out);
157 if (num_bytes >
sizeof b_out)
168 flip_endian(b_out, b_in, num_bytes);
169 bin32_to_felem(out, b_out);
177 smallfelem_to_bin32(b_in, in);
178 flip_endian(b_out, b_in,
sizeof b_out);
179 return BN_bin2bn(b_out,
sizeof b_out, out);
202 static void felem_assign(
felem out,
const felem in)
211 static void felem_sum(
felem out,
const felem in)
229 static void felem_scalar(
felem out,
const u64 scalar)
238 static void longfelem_scalar(
longfelem out,
const u64 scalar)
250 #define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
251 #define two105 (((limb)1) << 105)
252 #define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
264 out[0] = zero105[0] - small[0];
265 out[1] = zero105[1] - small[1];
266 out[2] = zero105[2] - small[2];
267 out[3] = zero105[3] - small[3];
276 static void felem_diff(
felem out,
const felem in)
279 out[0] += zero105[0];
280 out[1] += zero105[1];
281 out[2] += zero105[2];
282 out[3] += zero105[3];
290 #define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
291 #define two107 (((limb)1) << 107)
292 #define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
304 static void felem_diff_zero107(
felem out,
const felem in)
307 out[0] += zero107[0];
308 out[1] += zero107[1];
309 out[2] += zero107[2];
310 out[3] += zero107[3];
326 static const limb two70m8p6 = (((
limb)1) << 70) - (((
limb)1) << 8) + (((
limb)1) << 6);
327 static const limb two70p40 = (((
limb)1) << 70) + (((
limb)1) << 40);
328 static const limb two70 = (((
limb)1) << 70);
329 static const limb two70m40m38p6 = (((
limb)1) << 70) - (((
limb)1) << 40) - (((
limb)1) << 38) + (((
limb)1) << 6);
330 static const limb two70m6 = (((
limb)1) << 70) - (((
limb)1) << 6);
336 out[3] += two70m40m38p6;
353 #define two64m0 (((limb)1) << 64) - 1
354 #define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
355 #define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
356 #define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
374 static const u64 kPrime3Test = 0x7fffffff00000001ul;
377 tmp[3] = zero110[3] + in[3] + ((
u64) (in[2] >> 64));
380 tmp[2] = zero110[2] + (
u64) in[2];
381 tmp[0] = zero110[0] + in[0];
382 tmp[1] = zero110[1] + in[1];
389 tmp[3] = (
u64) tmp[3];
391 tmp[3] += ((
limb)a) << 32;
397 tmp[3] = (
u64) tmp[3];
399 tmp[3] += ((
limb)a) << 32;
405 tmp[1] -= (((
limb)b) << 32);
429 mask = (mask & low) | high;
430 tmp[0] -= mask & kPrime[0];
431 tmp[1] -= mask & kPrime[1];
433 tmp[3] -= mask & kPrime[3];
436 tmp[1] += ((
u64) (tmp[0] >> 64)); tmp[0] = (
u64) tmp[0];
437 tmp[2] += ((
u64) (tmp[1] >> 64)); tmp[1] = (
u64) tmp[1];
438 tmp[3] += ((
u64) (tmp[2] >> 64)); tmp[2] = (
u64) tmp[2];
467 a = ((uint128_t) small[0]) * small[0];
473 a = ((uint128_t) small[0]) * small[1];
480 a = ((uint128_t) small[0]) * small[2];
487 a = ((uint128_t) small[0]) * small[3];
493 a = ((uint128_t) small[1]) * small[2];
500 a = ((uint128_t) small[1]) * small[1];
506 a = ((uint128_t) small[1]) * small[3];
513 a = ((uint128_t) small[2]) * small[3];
521 a = ((uint128_t) small[2]) * small[2];
527 a = ((uint128_t) small[3]) * small[3];
543 felem_shrink(small, in);
544 smallfelem_square(out, small);
559 a = ((uint128_t) small1[0]) * small2[0];
566 a = ((uint128_t) small1[0]) * small2[1];
572 a = ((uint128_t) small1[1]) * small2[0];
579 a = ((uint128_t) small1[0]) * small2[2];
585 a = ((uint128_t) small1[1]) * small2[1];
591 a = ((uint128_t) small1[2]) * small2[0];
598 a = ((uint128_t) small1[0]) * small2[3];
604 a = ((uint128_t) small1[1]) * small2[2];
610 a = ((uint128_t) small1[2]) * small2[1];
616 a = ((uint128_t) small1[3]) * small2[0];
623 a = ((uint128_t) small1[1]) * small2[3];
629 a = ((uint128_t) small1[2]) * small2[2];
635 a = ((uint128_t) small1[3]) * small2[1];
642 a = ((uint128_t) small1[2]) * small2[3];
648 a = ((uint128_t) small1[3]) * small2[2];
655 a = ((uint128_t) small1[3]) * small2[3];
672 felem_shrink(small1, in1);
673 felem_shrink(small2, in2);
674 smallfelem_mul(out, small1, small2);
687 felem_shrink(small2, in2);
688 smallfelem_mul(out, small1, small2);
691 #define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
692 #define two100 (((limb)1) << 100)
693 #define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
714 c = in[4] + (in[5] << 32);
724 out[1] -= (in[4] << 32);
725 out[3] += (in[4] << 32);
728 out[2] -= (in[5] << 32);
732 out[0] -= (in[6] << 32);
733 out[1] += (in[6] << 33);
734 out[2] += (in[6] * 2);
735 out[3] -= (in[6] << 32);
739 out[0] -= (in[7] << 32);
740 out[2] += (in[7] << 33);
741 out[3] += (in[7] * 3);
754 out[0] = zero100[0] + in[0];
755 out[1] = zero100[1] + in[1];
756 out[2] = zero100[2] + in[2];
757 out[3] = zero100[3] + in[3];
759 felem_reduce_(out, in);
781 out[0] = zero105[0] + in[0];
782 out[1] = zero105[1] + in[1];
783 out[2] = zero105[2] + in[2];
784 out[3] = zero105[3] + in[3];
786 felem_reduce_(out, in);
802 static void subtract_u64(
u64* result,
u64* carry,
u64 v)
804 uint128_t r = *result;
806 *carry = (r >> 64) & 1;
817 u64 all_equal_so_far = 0, result = 0, carry;
819 felem_shrink(out, in);
827 for (i = 3; i < 4; i--)
830 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
833 result |= all_equal_so_far & ((
u64) (a >> 64));
837 equal = kPrime[i] ^ out[i];
839 equal &= equal << 32;
840 equal &= equal << 16;
845 equal = ((
s64) equal) >> 63;
847 all_equal_so_far &= equal;
852 result |= all_equal_so_far;
855 subtract_u64(&out[0], &carry, result & kPrime[0]);
856 subtract_u64(&out[1], &carry, carry);
857 subtract_u64(&out[2], &carry, carry);
858 subtract_u64(&out[3], &carry, carry);
860 subtract_u64(&out[1], &carry, result & kPrime[1]);
861 subtract_u64(&out[2], &carry, carry);
862 subtract_u64(&out[3], &carry, carry);
864 subtract_u64(&out[2], &carry, result & kPrime[2]);
865 subtract_u64(&out[3], &carry, carry);
867 subtract_u64(&out[3], &carry, result & kPrime[3]);
875 smallfelem_square(longtmp, in);
876 felem_reduce(tmp, longtmp);
877 felem_contract(out, tmp);
885 smallfelem_mul(longtmp, in1, in2);
886 felem_reduce(tmp, longtmp);
887 felem_contract(out, tmp);
900 u64 is_zero = small[0] | small[1] | small[2] | small[3];
902 is_zero &= is_zero << 32;
903 is_zero &= is_zero << 16;
904 is_zero &= is_zero << 8;
905 is_zero &= is_zero << 4;
906 is_zero &= is_zero << 2;
907 is_zero &= is_zero << 1;
908 is_zero = ((
s64) is_zero) >> 63;
910 is_p = (small[0] ^ kPrime[0]) |
911 (small[1] ^ kPrime[1]) |
912 (small[2] ^ kPrime[2]) |
913 (small[3] ^ kPrime[3]);
921 is_p = ((
s64) is_p) >> 63;
926 result |= ((
limb) is_zero) << 64;
930 static int smallfelem_is_zero_int(
const smallfelem small)
932 return (
int) (smallfelem_is_zero(small) & ((
limb)1));
942 static void felem_inv(
felem out,
const felem in)
946 felem e2, e4, e8, e16, e32, e64;
950 felem_square(tmp, in); felem_reduce(ftmp, tmp);
951 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp);
952 felem_assign(e2, ftmp);
953 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
954 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
955 felem_mul(tmp, ftmp, e2); felem_reduce(ftmp, tmp);
956 felem_assign(e4, ftmp);
957 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
958 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
959 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
960 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
961 felem_mul(tmp, ftmp, e4); felem_reduce(ftmp, tmp);
962 felem_assign(e8, ftmp);
963 for (i = 0; i < 8; i++) {
964 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
966 felem_mul(tmp, ftmp, e8); felem_reduce(ftmp, tmp);
967 felem_assign(e16, ftmp);
968 for (i = 0; i < 16; i++) {
969 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
971 felem_mul(tmp, ftmp, e16); felem_reduce(ftmp, tmp);
972 felem_assign(e32, ftmp);
973 for (i = 0; i < 32; i++) {
974 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
976 felem_assign(e64, ftmp);
977 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp);
978 for (i = 0; i < 192; i++) {
979 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
982 felem_mul(tmp, e64, e32); felem_reduce(ftmp2, tmp);
983 for (i = 0; i < 16; i++) {
984 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
986 felem_mul(tmp, ftmp2, e16); felem_reduce(ftmp2, tmp);
987 for (i = 0; i < 8; i++) {
988 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
990 felem_mul(tmp, ftmp2, e8); felem_reduce(ftmp2, tmp);
991 for (i = 0; i < 4; i++) {
992 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
994 felem_mul(tmp, ftmp2, e4); felem_reduce(ftmp2, tmp);
995 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
996 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
997 felem_mul(tmp, ftmp2, e2); felem_reduce(ftmp2, tmp);
998 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
999 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
1000 felem_mul(tmp, ftmp2, in); felem_reduce(ftmp2, tmp);
1002 felem_mul(tmp, ftmp2, ftmp); felem_reduce(out, tmp);
1009 smallfelem_expand(tmp, in);
1010 felem_inv(tmp, tmp);
1011 felem_contract(out, tmp);
1033 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1036 felem_assign(ftmp, x_in);
1038 felem_assign(ftmp2, x_in);
1042 felem_square(tmp, z_in);
1043 felem_reduce(delta, tmp);
1047 felem_square(tmp, y_in);
1048 felem_reduce(gamma, tmp);
1050 felem_shrink(small1, gamma);
1053 felem_small_mul(tmp, small1, x_in);
1054 felem_reduce(beta, tmp);
1058 felem_diff(ftmp, delta);
1060 felem_sum(ftmp2, delta);
1062 felem_scalar(ftmp2, 3);
1064 felem_mul(tmp, ftmp, ftmp2);
1065 felem_reduce(alpha, tmp);
1067 felem_shrink(small2, alpha);
1070 smallfelem_square(tmp, small2);
1071 felem_reduce(x_out, tmp);
1072 felem_assign(ftmp, beta);
1073 felem_scalar(ftmp, 8);
1075 felem_diff(x_out, ftmp);
1079 felem_sum(delta, gamma);
1081 felem_assign(ftmp, y_in);
1082 felem_sum(ftmp, z_in);
1084 felem_square(tmp, ftmp);
1085 felem_reduce(z_out, tmp);
1086 felem_diff(z_out, delta);
1090 felem_scalar(beta, 4);
1092 felem_diff_zero107(beta, x_out);
1094 felem_small_mul(tmp, small2, beta);
1096 smallfelem_square(tmp2, small1);
1098 longfelem_scalar(tmp2, 8);
1100 longfelem_diff(tmp, tmp2);
1102 felem_reduce_zero105(y_out, tmp);
1112 felem felem_x_out, felem_y_out, felem_z_out;
1113 felem felem_x_in, felem_y_in, felem_z_in;
1115 smallfelem_expand(felem_x_in, x_in);
1116 smallfelem_expand(felem_y_in, y_in);
1117 smallfelem_expand(felem_z_in, z_in);
1118 point_double(felem_x_out, felem_y_out, felem_z_out,
1119 felem_x_in, felem_y_in, felem_z_in);
1120 felem_shrink(x_out, felem_x_out);
1121 felem_shrink(y_out, felem_y_out);
1122 felem_shrink(z_out, felem_z_out);
1130 for (i = 0; i <
NLIMBS; ++i)
1132 const limb tmp = mask & (in[i] ^ out[i]);
1142 const u64 mask64 = mask;
1143 for (i = 0; i <
NLIMBS; ++i)
1145 out[i] = ((
limb) (in[i] & mask64)) | (out[i] & ~mask);
1163 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1165 smallfelem small1, small2, small3, small4, small5;
1166 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1168 felem_shrink(small3, z1);
1170 z1_is_zero = smallfelem_is_zero(small3);
1171 z2_is_zero = smallfelem_is_zero(z2);
1174 smallfelem_square(tmp, small3);
1175 felem_reduce(ftmp, tmp);
1177 felem_shrink(small1, ftmp);
1182 smallfelem_square(tmp, z2);
1183 felem_reduce(ftmp2, tmp);
1185 felem_shrink(small2, ftmp2);
1187 felem_shrink(small5, x1);
1190 smallfelem_mul(tmp, small5, small2);
1191 felem_reduce(ftmp3, tmp);
1195 felem_assign(ftmp5, z1);
1196 felem_small_sum(ftmp5, z2);
1200 felem_square(tmp, ftmp5);
1201 felem_reduce(ftmp5, tmp);
1203 felem_sum(ftmp2, ftmp);
1205 felem_diff(ftmp5, ftmp2);
1209 smallfelem_mul(tmp, small2, z2);
1210 felem_reduce(ftmp2, tmp);
1213 felem_mul(tmp, y1, ftmp2);
1214 felem_reduce(ftmp6, tmp);
1222 felem_assign(ftmp3, x1);
1226 felem_assign(ftmp5, z1);
1227 felem_scalar(ftmp5, 2);
1231 felem_assign(ftmp6, y1);
1236 smallfelem_mul(tmp, x2, small1);
1237 felem_reduce(ftmp4, tmp);
1240 felem_diff_zero107(ftmp4, ftmp3);
1242 felem_shrink(small4, ftmp4);
1244 x_equal = smallfelem_is_zero(small4);
1247 felem_small_mul(tmp, small4, ftmp5);
1248 felem_reduce(z_out, tmp);
1252 smallfelem_mul(tmp, small1, small3);
1253 felem_reduce(ftmp, tmp);
1256 felem_small_mul(tmp, y2, ftmp);
1257 felem_reduce(ftmp5, tmp);
1260 felem_diff_zero107(ftmp5, ftmp6);
1262 felem_scalar(ftmp5, 2);
1264 felem_shrink(small1, ftmp5);
1265 y_equal = smallfelem_is_zero(small1);
1267 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1269 point_double(x3, y3, z3, x1, y1, z1);
1274 felem_assign(ftmp, ftmp4);
1275 felem_scalar(ftmp, 2);
1277 felem_square(tmp, ftmp);
1278 felem_reduce(ftmp, tmp);
1281 felem_mul(tmp, ftmp4, ftmp);
1282 felem_reduce(ftmp2, tmp);
1285 felem_mul(tmp, ftmp3, ftmp);
1286 felem_reduce(ftmp4, tmp);
1289 smallfelem_square(tmp, small1);
1290 felem_reduce(x_out, tmp);
1291 felem_assign(ftmp3, ftmp4);
1292 felem_scalar(ftmp4, 2);
1293 felem_sum(ftmp4, ftmp2);
1295 felem_diff(x_out, ftmp4);
1299 felem_diff_zero107(ftmp3, x_out);
1301 felem_small_mul(tmp, small1, ftmp3);
1302 felem_mul(tmp2, ftmp6, ftmp2);
1303 longfelem_scalar(tmp2, 2);
1305 longfelem_diff(tmp, tmp2);
1307 felem_reduce_zero105(y_out, tmp);
1310 copy_small_conditional(x_out, x2, z1_is_zero);
1311 copy_conditional(x_out, x1, z2_is_zero);
1312 copy_small_conditional(y_out, y2, z1_is_zero);
1313 copy_conditional(y_out, y1, z2_is_zero);
1314 copy_small_conditional(z_out, z2, z1_is_zero);
1315 copy_conditional(z_out, z1, z2_is_zero);
1316 felem_assign(x3, x_out);
1317 felem_assign(y3, y_out);
1318 felem_assign(z3, z_out);
1327 felem felem_x3, felem_y3, felem_z3;
1328 felem felem_x1, felem_y1, felem_z1;
1329 smallfelem_expand(felem_x1, x1);
1330 smallfelem_expand(felem_y1, y1);
1331 smallfelem_expand(felem_z1, z1);
1332 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0, x2, y2, z2);
1333 felem_shrink(x3, felem_x3);
1334 felem_shrink(y3, felem_y3);
1335 felem_shrink(z3, felem_z3);
1378 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247},
1379 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b},
1381 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5},
1382 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d},
1384 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f},
1385 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644},
1387 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67},
1388 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee},
1390 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff},
1391 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b},
1393 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8},
1394 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851},
1396 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea},
1397 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b},
1399 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276},
1400 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816},
1402 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad},
1403 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663},
1405 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d},
1406 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321},
1408 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287},
1409 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6},
1411 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466},
1412 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20},
1414 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9},
1415 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61},
1417 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a},
1418 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc},
1420 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c},
1421 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab},
1426 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89},
1427 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624},
1429 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6},
1430 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1},
1432 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a},
1433 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593},
1435 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617},
1436 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7},
1438 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276},
1439 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a},
1441 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908},
1442 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e},
1444 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7},
1445 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec},
1447 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee},
1448 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6},
1450 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109},
1451 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5},
1453 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba},
1454 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44},
1456 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b},
1457 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc},
1459 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107},
1460 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387},
1462 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503},
1463 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be},
1465 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9},
1466 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a},
1468 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6},
1469 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81},
1474 static void select_point(
const u64 idx,
unsigned int size,
const smallfelem pre_comp[16][3],
smallfelem out[3])
1477 u64 *outlimbs = &out[0][0];
1480 for (i = 0; i < size; i++)
1482 const u64 *inlimbs = (
u64*) &pre_comp[i][0][0];
1489 for (j = 0; j < NLIMBS * 3; j++)
1490 outlimbs[j] |= inlimbs[j] & mask;
1497 if ((i < 0) || (i >= 256))
1499 return (in[i >> 3] >> (i & 7)) & 1;
1512 unsigned num, gen_mul = (g_scalar != NULL);
1519 memset(nq, 0, 3 *
sizeof(
felem));
1526 for (i = (num_points ? 255 : 31); i >= 0; --i)
1530 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1533 if (gen_mul && (i <= 31))
1536 bits = get_bit(g_scalar, i + 224) << 3;
1537 bits |= get_bit(g_scalar, i + 160) << 2;
1538 bits |= get_bit(g_scalar, i + 96) << 1;
1539 bits |= get_bit(g_scalar, i + 32);
1541 select_point(bits, 16, g_pre_comp[1], tmp);
1545 point_add(nq[0], nq[1], nq[2],
1546 nq[0], nq[1], nq[2],
1547 1 , tmp[0], tmp[1], tmp[2]);
1551 smallfelem_expand(nq[0], tmp[0]);
1552 smallfelem_expand(nq[1], tmp[1]);
1553 smallfelem_expand(nq[2], tmp[2]);
1558 bits = get_bit(g_scalar, i + 192) << 3;
1559 bits |= get_bit(g_scalar, i + 128) << 2;
1560 bits |= get_bit(g_scalar, i + 64) << 1;
1561 bits |= get_bit(g_scalar, i);
1563 select_point(bits, 16, g_pre_comp[0], tmp);
1564 point_add(nq[0], nq[1], nq[2],
1565 nq[0], nq[1], nq[2],
1566 1 , tmp[0], tmp[1], tmp[2]);
1570 if (num_points && (i % 5 == 0))
1573 for (num = 0; num < num_points; ++
num)
1575 bits = get_bit(scalars[num], i + 4) << 5;
1576 bits |= get_bit(scalars[num], i + 3) << 4;
1577 bits |= get_bit(scalars[num], i + 2) << 3;
1578 bits |= get_bit(scalars[num], i + 1) << 2;
1579 bits |= get_bit(scalars[num], i) << 1;
1580 bits |= get_bit(scalars[num], i - 1);
1584 select_point(digit, 17, pre_comp[num], tmp);
1585 smallfelem_neg(ftmp, tmp[1]);
1586 copy_small_conditional(ftmp, tmp[1], (((
limb) sign) - 1));
1587 felem_contract(tmp[1], ftmp);
1591 point_add(nq[0], nq[1], nq[2],
1592 nq[0], nq[1], nq[2],
1593 mixed, tmp[0], tmp[1], tmp[2]);
1597 smallfelem_expand(nq[0], tmp[0]);
1598 smallfelem_expand(nq[1], tmp[1]);
1599 smallfelem_expand(nq[2], tmp[2]);
1605 felem_assign(x_out, nq[0]);
1606 felem_assign(y_out, nq[1]);
1607 felem_assign(z_out, nq[2]);
1680 static void *nistp256_pre_comp_dup(
void *src_)
1690 static void nistp256_pre_comp_free(
void *pre_)
1705 static void nistp256_pre_comp_clear_free(
void *pre_)
1738 BIGNUM *curve_p, *curve_a, *curve_b;
1741 if ((ctx = new_ctx =
BN_CTX_new()) == NULL)
return 0;
1745 ((curve_b =
BN_CTX_get(ctx)) == NULL))
goto err;
1760 if (new_ctx != NULL)
1770 felem z1, z2, x_in, y_in;
1780 if ((!BN_to_felem(x_in, &point->
X)) || (!BN_to_felem(y_in, &point->
Y)) ||
1781 (!BN_to_felem(z1, &point->
Z)))
return 0;
1783 felem_square(tmp, z2); felem_reduce(z1, tmp);
1784 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1785 felem_contract(x_out, x_in);
1788 if (!smallfelem_to_BN(x, x_out)) {
1794 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1795 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1796 felem_contract(y_out, y_in);
1799 if (!smallfelem_to_BN(y, y_out))
1809 static void make_points_affine(
size_t num,
smallfelem points[][3],
smallfelem tmp_smallfelems[])
1818 (
void (*)(
void *)) smallfelem_one,
1819 (
int (*)(
const void *)) smallfelem_is_zero_int,
1820 (
void (*)(
void *,
const void *)) smallfelem_assign,
1821 (
void (*)(
void *,
const void *)) smallfelem_square_contract,
1822 (
void (*)(
void *,
const void *,
const void *)) smallfelem_mul_contract,
1823 (
void (*)(
void *,
const void *)) smallfelem_inv_contract,
1824 (
void (*)(
void *,
const void *)) smallfelem_assign );
1837 BIGNUM *x, *y, *z, *tmp_scalar;
1843 unsigned i, num_bytes;
1844 int have_pre_comp = 0;
1845 size_t num_points =
num;
1847 felem x_out, y_out, z_out;
1849 const smallfelem (*g_pre_comp)[16][3] = NULL;
1852 const BIGNUM *p_scalar = NULL;
1855 if ((ctx = new_ctx =
BN_CTX_new()) == NULL)
return 0;
1866 nistp256_pre_comp_dup, nistp256_pre_comp_free,
1867 nistp256_pre_comp_clear_free);
1873 g_pre_comp = &gmul[0];
1875 if (generator == NULL)
1878 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
1879 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
1880 !smallfelem_to_BN(z, g_pre_comp[0][1][2]))
1886 generator, x, y, z, ctx))
1898 if (num_points >= 3)
1908 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_smallfelems == NULL)))
1917 memset(pre_comp, 0, num_points * 17 * 3 *
sizeof(
smallfelem));
1918 for (i = 0; i < num_points; ++i)
1931 p_scalar = scalars[i];
1933 if ((p_scalar != NULL) && (p != NULL))
1949 flip_endian(secrets[i], tmp, num_bytes);
1951 if ((!BN_to_felem(x_out, &p->
X)) ||
1952 (!BN_to_felem(y_out, &p->
Y)) ||
1953 (!BN_to_felem(z_out, &p->
Z)))
goto err;
1954 felem_shrink(pre_comp[i][1][0], x_out);
1955 felem_shrink(pre_comp[i][1][1], y_out);
1956 felem_shrink(pre_comp[i][1][2], z_out);
1957 for (j = 2; j <= 16; ++j)
1962 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1963 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1964 pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1969 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1970 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1976 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
1980 if ((scalar != NULL) && (have_pre_comp))
1982 memset(g_secret, 0,
sizeof(g_secret));
1997 flip_endian(g_secret, tmp, num_bytes);
1999 batch_mul(x_out, y_out, z_out,
2002 mixed, (
const smallfelem (*)[17][3]) pre_comp,
2007 batch_mul(x_out, y_out, z_out,
2009 NULL, mixed, (
const smallfelem (*)[17][3]) pre_comp, NULL);
2011 felem_contract(x_in, x_out);
2012 felem_contract(y_in, y_out);
2013 felem_contract(z_in, z_out);
2014 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2015 (!smallfelem_to_BN(z, z_in)))
2024 if (generator != NULL)
2026 if (new_ctx != NULL)
2028 if (secrets != NULL)
2030 if (pre_comp != NULL)
2032 if (tmp_smallfelems != NULL)
2046 felem x_tmp, y_tmp, z_tmp;
2050 nistp256_pre_comp_free, nistp256_pre_comp_clear_free);
2052 if ((ctx = new_ctx =
BN_CTX_new()) == NULL)
return 0;
2060 if (generator == NULL)
2066 if ((pre = nistp256_pre_comp_new()) == NULL)
2075 if ((!BN_to_felem(x_tmp, &group->
generator->
X)) ||
2076 (!BN_to_felem(y_tmp, &group->
generator->
Y)) ||
2079 felem_shrink(pre->
g_pre_comp[0][1][0], x_tmp);
2080 felem_shrink(pre->
g_pre_comp[0][1][1], y_tmp);
2081 felem_shrink(pre->
g_pre_comp[0][1][2], z_tmp);
2085 for (i = 1; i <= 8; i <<= 1)
2090 for (j = 0; j < 31; ++j)
2101 for (j = 0; j < 31; ++j)
2108 for (i = 0; i < 2; i++)
2133 for (j = 1; j < 8; ++j)
2142 make_points_affine(31, &(pre->
g_pre_comp[0][1]), tmp_smallfelems);
2145 nistp256_pre_comp_free, nistp256_pre_comp_clear_free))
2151 if (generator != NULL)
2153 if (new_ctx != NULL)
2156 nistp256_pre_comp_free(pre);
2163 nistp256_pre_comp_free, nistp256_pre_comp_clear_free)
2170 static void *dummy=&dummy;