1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package elliptic
16
17 import (
18 "math/big"
19 "sync"
20 )
21
22 type (
23 p256Curve struct {
24 *CurveParams
25 }
26
27 p256Point struct {
28 xyz [12]uint64
29 }
30 )
31
32 var (
33 p256 p256Curve
34 p256Precomputed *[37][64 * 8]uint64
35 precomputeOnce sync.Once
36 )
37
38 func initP256() {
39
40 p256.CurveParams = &CurveParams{Name: "P-256"}
41 p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
42 p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
43 p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
44 p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
45 p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
46 p256.BitSize = 256
47 }
48
49 func (curve p256Curve) Params() *CurveParams {
50 return curve.CurveParams
51 }
52
53
54
55
56 func p256Mul(res, in1, in2 []uint64)
57
58
59
60 func p256Sqr(res, in []uint64)
61
62
63
64 func p256FromMont(res, in []uint64)
65
66
67
68 func p256NegCond(val []uint64, cond int)
69
70
71
72 func p256MovCond(res, a, b []uint64, cond int)
73
74
75
76 func p256BigToLittle(res []uint64, in []byte)
77
78
79 func p256LittleToBig(res []byte, in []uint64)
80
81
82
83 func p256Select(point, table []uint64, idx int)
84
85
86 func p256SelectBase(point, table []uint64, idx int)
87
88
89
90 func p256OrdMul(res, in1, in2 []uint64)
91
92
93
94 func p256OrdSqr(res, in []uint64, n int)
95
96
97
98
99
100
101 func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
102
103
104
105
106
107 func p256PointAddAsm(res, in1, in2 []uint64) int
108
109
110
111 func p256PointDoubleAsm(res, in []uint64)
112
113 func (curve p256Curve) Inverse(k *big.Int) *big.Int {
114 if k.Sign() < 0 {
115
116 k = new(big.Int).Neg(k)
117 }
118
119 if k.Cmp(p256.N) >= 0 {
120
121 k = new(big.Int).Mod(k, p256.N)
122 }
123
124
125
126 var table [4 * 15]uint64
127
128 x := make([]uint64, 4)
129 fromBig(x[:], k)
130
131
132
133
134
135
136 RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
137 p256OrdMul(table[:4], x, RR)
138
139
140
141 for i := 2; i < 16; i += 2 {
142 p256OrdSqr(table[4*(i-1):], table[4*((i/2)-1):], 1)
143 p256OrdMul(table[4*i:], table[4*(i-1):], table[:4])
144 }
145
146 x[0] = table[4*14+0]
147 x[1] = table[4*14+1]
148 x[2] = table[4*14+2]
149 x[3] = table[4*14+3]
150
151 p256OrdSqr(x, x, 4)
152 p256OrdMul(x, x, table[4*14:4*14+4])
153 t := make([]uint64, 4, 4)
154 t[0] = x[0]
155 t[1] = x[1]
156 t[2] = x[2]
157 t[3] = x[3]
158
159 p256OrdSqr(x, x, 8)
160 p256OrdMul(x, x, t)
161 t[0] = x[0]
162 t[1] = x[1]
163 t[2] = x[2]
164 t[3] = x[3]
165
166 p256OrdSqr(x, x, 16)
167 p256OrdMul(x, x, t)
168 t[0] = x[0]
169 t[1] = x[1]
170 t[2] = x[2]
171 t[3] = x[3]
172
173 p256OrdSqr(x, x, 64)
174 p256OrdMul(x, x, t)
175 p256OrdSqr(x, x, 32)
176 p256OrdMul(x, x, t)
177
178
179 expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
180 for i := 0; i < 32; i++ {
181 p256OrdSqr(x, x, 4)
182 p256OrdMul(x, x, table[4*(expLo[i]-1):])
183 }
184
185
186
187 one := []uint64{1, 0, 0, 0}
188 p256OrdMul(x, x, one)
189
190 xOut := make([]byte, 32)
191 p256LittleToBig(xOut, x)
192 return new(big.Int).SetBytes(xOut)
193 }
194
195
196 func fromBig(out []uint64, big *big.Int) {
197 for i := range out {
198 out[i] = 0
199 }
200
201 for i, v := range big.Bits() {
202 out[i] = uint64(v)
203 }
204 }
205
206
207
208
209 func p256GetScalar(out []uint64, in []byte) {
210 n := new(big.Int).SetBytes(in)
211
212 if n.Cmp(p256.N) >= 0 {
213 n.Mod(n, p256.N)
214 }
215 fromBig(out, n)
216 }
217
218
219
220
221 var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
222
223 func maybeReduceModP(in *big.Int) *big.Int {
224 if in.Cmp(p256.P) < 0 {
225 return in
226 }
227 return new(big.Int).Mod(in, p256.P)
228 }
229
230 func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
231 scalarReversed := make([]uint64, 4)
232 var r1, r2 p256Point
233 p256GetScalar(scalarReversed, baseScalar)
234 r1IsInfinity := scalarIsZero(scalarReversed)
235 r1.p256BaseMult(scalarReversed)
236
237 p256GetScalar(scalarReversed, scalar)
238 r2IsInfinity := scalarIsZero(scalarReversed)
239 fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
240 fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
241 p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:])
242 p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:])
243
244
245 r2.xyz[8] = 0x0000000000000001
246 r2.xyz[9] = 0xffffffff00000000
247 r2.xyz[10] = 0xffffffffffffffff
248 r2.xyz[11] = 0x00000000fffffffe
249
250 r2.p256ScalarMult(scalarReversed)
251
252 var sum, double p256Point
253 pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:])
254 p256PointDoubleAsm(double.xyz[:], r1.xyz[:])
255 sum.CopyConditional(&double, pointsEqual)
256 sum.CopyConditional(&r1, r2IsInfinity)
257 sum.CopyConditional(&r2, r1IsInfinity)
258
259 return sum.p256PointToAffine()
260 }
261
262 func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
263 scalarReversed := make([]uint64, 4)
264 p256GetScalar(scalarReversed, scalar)
265
266 var r p256Point
267 r.p256BaseMult(scalarReversed)
268 return r.p256PointToAffine()
269 }
270
271 func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
272 scalarReversed := make([]uint64, 4)
273 p256GetScalar(scalarReversed, scalar)
274
275 var r p256Point
276 fromBig(r.xyz[0:4], maybeReduceModP(bigX))
277 fromBig(r.xyz[4:8], maybeReduceModP(bigY))
278 p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
279 p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
280
281 r.xyz[8] = 0x0000000000000001
282 r.xyz[9] = 0xffffffff00000000
283 r.xyz[10] = 0xffffffffffffffff
284 r.xyz[11] = 0x00000000fffffffe
285
286 r.p256ScalarMult(scalarReversed)
287 return r.p256PointToAffine()
288 }
289
290
291 func uint64IsZero(x uint64) int {
292 x = ^x
293 x &= x >> 32
294 x &= x >> 16
295 x &= x >> 8
296 x &= x >> 4
297 x &= x >> 2
298 x &= x >> 1
299 return int(x & 1)
300 }
301
302
303
304 func scalarIsZero(scalar []uint64) int {
305 return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3])
306 }
307
308 func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
309 zInv := make([]uint64, 4)
310 zInvSq := make([]uint64, 4)
311 p256Inverse(zInv, p.xyz[8:12])
312 p256Sqr(zInvSq, zInv)
313 p256Mul(zInv, zInv, zInvSq)
314
315 p256Mul(zInvSq, p.xyz[0:4], zInvSq)
316 p256Mul(zInv, p.xyz[4:8], zInv)
317
318 p256FromMont(zInvSq, zInvSq)
319 p256FromMont(zInv, zInv)
320
321 xOut := make([]byte, 32)
322 yOut := make([]byte, 32)
323 p256LittleToBig(xOut, zInvSq)
324 p256LittleToBig(yOut, zInv)
325
326 return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
327 }
328
329
330
331 func (p *p256Point) CopyConditional(src *p256Point, v int) {
332 pMask := uint64(v) - 1
333 srcMask := ^pMask
334
335 for i, n := range p.xyz {
336 p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask)
337 }
338 }
339
340
341 func p256Inverse(out, in []uint64) {
342 var stack [6 * 4]uint64
343 p2 := stack[4*0 : 4*0+4]
344 p4 := stack[4*1 : 4*1+4]
345 p8 := stack[4*2 : 4*2+4]
346 p16 := stack[4*3 : 4*3+4]
347 p32 := stack[4*4 : 4*4+4]
348
349 p256Sqr(out, in)
350 p256Mul(p2, out, in)
351
352 p256Sqr(out, p2)
353 p256Sqr(out, out)
354 p256Mul(p4, out, p2)
355
356 p256Sqr(out, p4)
357 p256Sqr(out, out)
358 p256Sqr(out, out)
359 p256Sqr(out, out)
360 p256Mul(p8, out, p4)
361
362 p256Sqr(out, p8)
363
364 for i := 0; i < 7; i++ {
365 p256Sqr(out, out)
366 }
367 p256Mul(p16, out, p8)
368
369 p256Sqr(out, p16)
370 for i := 0; i < 15; i++ {
371 p256Sqr(out, out)
372 }
373 p256Mul(p32, out, p16)
374
375 p256Sqr(out, p32)
376
377 for i := 0; i < 31; i++ {
378 p256Sqr(out, out)
379 }
380 p256Mul(out, out, in)
381
382 for i := 0; i < 32*4; i++ {
383 p256Sqr(out, out)
384 }
385 p256Mul(out, out, p32)
386
387 for i := 0; i < 32; i++ {
388 p256Sqr(out, out)
389 }
390 p256Mul(out, out, p32)
391
392 for i := 0; i < 16; i++ {
393 p256Sqr(out, out)
394 }
395 p256Mul(out, out, p16)
396
397 for i := 0; i < 8; i++ {
398 p256Sqr(out, out)
399 }
400 p256Mul(out, out, p8)
401
402 p256Sqr(out, out)
403 p256Sqr(out, out)
404 p256Sqr(out, out)
405 p256Sqr(out, out)
406 p256Mul(out, out, p4)
407
408 p256Sqr(out, out)
409 p256Sqr(out, out)
410 p256Mul(out, out, p2)
411
412 p256Sqr(out, out)
413 p256Sqr(out, out)
414 p256Mul(out, out, in)
415 }
416
417 func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) {
418 copy(r[index*12:], p.xyz[:])
419 }
420
421 func boothW5(in uint) (int, int) {
422 var s uint = ^((in >> 5) - 1)
423 var d uint = (1 << 6) - in - 1
424 d = (d & s) | (in & (^s))
425 d = (d >> 1) + (d & 1)
426 return int(d), int(s & 1)
427 }
428
429 func boothW7(in uint) (int, int) {
430 var s uint = ^((in >> 7) - 1)
431 var d uint = (1 << 8) - in - 1
432 d = (d & s) | (in & (^s))
433 d = (d >> 1) + (d & 1)
434 return int(d), int(s & 1)
435 }
436
437 func initTable() {
438 p256Precomputed = new([37][64 * 8]uint64)
439
440 basePoint := []uint64{
441 0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6,
442 0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85,
443 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe,
444 }
445 t1 := make([]uint64, 12)
446 t2 := make([]uint64, 12)
447 copy(t2, basePoint)
448
449 zInv := make([]uint64, 4)
450 zInvSq := make([]uint64, 4)
451 for j := 0; j < 64; j++ {
452 copy(t1, t2)
453 for i := 0; i < 37; i++ {
454
455 if i != 0 {
456 for k := 0; k < 7; k++ {
457 p256PointDoubleAsm(t1, t1)
458 }
459 }
460
461
462 p256Inverse(zInv, t1[8:12])
463 p256Sqr(zInvSq, zInv)
464 p256Mul(zInv, zInv, zInvSq)
465
466 p256Mul(t1[:4], t1[:4], zInvSq)
467 p256Mul(t1[4:8], t1[4:8], zInv)
468
469 copy(t1[8:12], basePoint[8:12])
470
471 copy(p256Precomputed[i][j*8:], t1[:8])
472 }
473 if j == 0 {
474 p256PointDoubleAsm(t2, basePoint)
475 } else {
476 p256PointAddAsm(t2, t2, basePoint)
477 }
478 }
479 }
480
481 func (p *p256Point) p256BaseMult(scalar []uint64) {
482 precomputeOnce.Do(initTable)
483
484 wvalue := (scalar[0] << 1) & 0xff
485 sel, sign := boothW7(uint(wvalue))
486 p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
487 p256NegCond(p.xyz[4:8], sign)
488
489
490 p.xyz[8] = 0x0000000000000001
491 p.xyz[9] = 0xffffffff00000000
492 p.xyz[10] = 0xffffffffffffffff
493 p.xyz[11] = 0x00000000fffffffe
494
495 var t0 p256Point
496
497 t0.xyz[8] = 0x0000000000000001
498 t0.xyz[9] = 0xffffffff00000000
499 t0.xyz[10] = 0xffffffffffffffff
500 t0.xyz[11] = 0x00000000fffffffe
501
502 index := uint(6)
503 zero := sel
504
505 for i := 1; i < 37; i++ {
506 if index < 192 {
507 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0xff
508 } else {
509 wvalue = (scalar[index/64] >> (index % 64)) & 0xff
510 }
511 index += 7
512 sel, sign = boothW7(uint(wvalue))
513 p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
514 p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
515 zero |= sel
516 }
517 }
518
519 func (p *p256Point) p256ScalarMult(scalar []uint64) {
520
521
522 var precomp [16 * 4 * 3]uint64
523 var t0, t1, t2, t3 p256Point
524
525
526 p.p256StorePoint(&precomp, 0)
527
528 p256PointDoubleAsm(t0.xyz[:], p.xyz[:])
529 p256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
530 p256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
531 p256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
532 t0.p256StorePoint(&precomp, 1)
533 t1.p256StorePoint(&precomp, 3)
534 t2.p256StorePoint(&precomp, 7)
535 t3.p256StorePoint(&precomp, 15)
536
537 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
538 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
539 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
540 t0.p256StorePoint(&precomp, 2)
541 t1.p256StorePoint(&precomp, 4)
542 t2.p256StorePoint(&precomp, 8)
543
544 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
545 p256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
546 t0.p256StorePoint(&precomp, 5)
547 t1.p256StorePoint(&precomp, 9)
548
549 p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
550 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
551 t2.p256StorePoint(&precomp, 6)
552 t1.p256StorePoint(&precomp, 10)
553
554 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
555 p256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
556 t0.p256StorePoint(&precomp, 11)
557 t2.p256StorePoint(&precomp, 13)
558
559 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
560 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
561 t0.p256StorePoint(&precomp, 12)
562 t2.p256StorePoint(&precomp, 14)
563
564
565 index := uint(254)
566 var sel, sign int
567
568 wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
569 sel, _ = boothW5(uint(wvalue))
570
571 p256Select(p.xyz[0:12], precomp[0:], sel)
572 zero := sel
573
574 for index > 4 {
575 index -= 5
576 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
577 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
578 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
579 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
580 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
581
582 if index < 192 {
583 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
584 } else {
585 wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
586 }
587
588 sel, sign = boothW5(uint(wvalue))
589
590 p256Select(t0.xyz[0:], precomp[0:], sel)
591 p256NegCond(t0.xyz[4:8], sign)
592 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
593 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
594 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
595 zero |= sel
596 }
597
598 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
599 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
600 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
601 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
602 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
603
604 wvalue = (scalar[0] << 1) & 0x3f
605 sel, sign = boothW5(uint(wvalue))
606
607 p256Select(t0.xyz[0:], precomp[0:], sel)
608 p256NegCond(t0.xyz[4:8], sign)
609 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
610 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
611 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
612 }
613
View as plain text