00001
00002
00003
00004 #include "pch.h"
00005
00006 #ifndef CRYPTOPP_IMPORTS
00007
00008 #include "integer.h"
00009 #include "modarith.h"
00010 #include "nbtheory.h"
00011 #include "asn.h"
00012 #include "oids.h"
00013 #include "words.h"
00014 #include "algparam.h"
00015 #include "pubkey.h"
00016 #include "sha.h"
00017 #include "cpu.h"
00018
00019 #include <iostream>
00020
00021 #if _MSC_VER >= 1400
00022 #include <intrin.h>
00023 #endif
00024
00025 #ifdef __DECCXX
00026 #include <c_asm.h>
00027 #endif
00028
00029 #ifdef CRYPTOPP_MSVC6_NO_PP
00030 #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
00031 #endif
00032
00033 #define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
00034
00035 NAMESPACE_BEGIN(CryptoPP)
00036
00037 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
00038 {
00039 if (valueType != typeid(Integer))
00040 return false;
00041 *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00042 return true;
00043 }
00044
00045 inline static int Compare(const word *A, const word *B, size_t N)
00046 {
00047 while (N--)
00048 if (A[N] > B[N])
00049 return 1;
00050 else if (A[N] < B[N])
00051 return -1;
00052
00053 return 0;
00054 }
00055
00056 inline static int Increment(word *A, size_t N, word B=1)
00057 {
00058 assert(N);
00059 word t = A[0];
00060 A[0] = t+B;
00061 if (A[0] >= t)
00062 return 0;
00063 for (unsigned i=1; i<N; i++)
00064 if (++A[i])
00065 return 0;
00066 return 1;
00067 }
00068
00069 inline static int Decrement(word *A, size_t N, word B=1)
00070 {
00071 assert(N);
00072 word t = A[0];
00073 A[0] = t-B;
00074 if (A[0] <= t)
00075 return 0;
00076 for (unsigned i=1; i<N; i++)
00077 if (A[i]--)
00078 return 0;
00079 return 1;
00080 }
00081
00082 static void TwosComplement(word *A, size_t N)
00083 {
00084 Decrement(A, N);
00085 for (unsigned i=0; i<N; i++)
00086 A[i] = ~A[i];
00087 }
00088
00089 static word AtomicInverseModPower2(word A)
00090 {
00091 assert(A%2==1);
00092
00093 word R=A%8;
00094
00095 for (unsigned i=3; i<WORD_BITS; i*=2)
00096 R = R*(2-R*A);
00097
00098 assert(R*A==1);
00099 return R;
00100 }
00101
00102
00103
00104 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
00105 #define Declare2Words(x) word x##0, x##1;
00106 #define AssignWord(a, b) a##0 = b; a##1 = 0;
00107 #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
00108 #define LowWord(a) a##0
00109 #define HighWord(a) a##1
00110 #ifdef _MSC_VER
00111 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
00112 #ifndef __INTEL_COMPILER
00113 #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
00114 #endif
00115 #elif defined(__DECCXX)
00116 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
00117 #elif defined(__x86_64__)
00118 #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
00119
00120 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
00121 #else
00122 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
00123 #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
00124 #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
00125 #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
00126 #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
00127 #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
00128 #endif
00129 #endif
00130 #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
00131 #ifndef Double3Words
00132 #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
00133 #endif
00134 #ifndef Acc2WordsBy2
00135 #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
00136 #endif
00137 #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
00138 #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
00139 #define GetCarry(u) u##1
00140 #define GetBorrow(u) u##1
00141 #else
00142 #define Declare2Words(x) dword x;
00143 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
00144 #define MultiplyWords(p, a, b) p = __emulu(a, b);
00145 #else
00146 #define MultiplyWords(p, a, b) p = (dword)a*b;
00147 #endif
00148 #define AssignWord(a, b) a = b;
00149 #define Add2WordsBy1(a, b, c) a = b + c;
00150 #define Acc2WordsBy2(a, b) a += b;
00151 #define LowWord(a) word(a)
00152 #define HighWord(a) word(a>>WORD_BITS)
00153 #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
00154 #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
00155 #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
00156 #define GetCarry(u) HighWord(u)
00157 #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
00158 #endif
00159 #ifndef MulAcc
00160 #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
00161 #endif
00162 #ifndef Acc2WordsBy1
00163 #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
00164 #endif
00165 #ifndef Acc3WordsBy2
00166 #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
00167 #endif
00168
00169 class DWord
00170 {
00171 public:
00172 DWord() {}
00173
00174 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00175 explicit DWord(word low)
00176 {
00177 m_whole = low;
00178 }
00179 #else
00180 explicit DWord(word low)
00181 {
00182 m_halfs.low = low;
00183 m_halfs.high = 0;
00184 }
00185 #endif
00186
00187 DWord(word low, word high)
00188 {
00189 m_halfs.low = low;
00190 m_halfs.high = high;
00191 }
00192
00193 static DWord Multiply(word a, word b)
00194 {
00195 DWord r;
00196 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00197 r.m_whole = (dword)a * b;
00198 #elif defined(MultiplyWordsLoHi)
00199 MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
00200 #endif
00201 return r;
00202 }
00203
00204 static DWord MultiplyAndAdd(word a, word b, word c)
00205 {
00206 DWord r = Multiply(a, b);
00207 return r += c;
00208 }
00209
00210 DWord & operator+=(word a)
00211 {
00212 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00213 m_whole = m_whole + a;
00214 #else
00215 m_halfs.low += a;
00216 m_halfs.high += (m_halfs.low < a);
00217 #endif
00218 return *this;
00219 }
00220
00221 DWord operator+(word a)
00222 {
00223 DWord r;
00224 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00225 r.m_whole = m_whole + a;
00226 #else
00227 r.m_halfs.low = m_halfs.low + a;
00228 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00229 #endif
00230 return r;
00231 }
00232
00233 DWord operator-(DWord a)
00234 {
00235 DWord r;
00236 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00237 r.m_whole = m_whole - a.m_whole;
00238 #else
00239 r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00240 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00241 #endif
00242 return r;
00243 }
00244
00245 DWord operator-(word a)
00246 {
00247 DWord r;
00248 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00249 r.m_whole = m_whole - a;
00250 #else
00251 r.m_halfs.low = m_halfs.low - a;
00252 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00253 #endif
00254 return r;
00255 }
00256
00257
00258 word operator/(word divisor);
00259
00260 word operator%(word a);
00261
00262 bool operator!() const
00263 {
00264 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00265 return !m_whole;
00266 #else
00267 return !m_halfs.high && !m_halfs.low;
00268 #endif
00269 }
00270
00271 word GetLowHalf() const {return m_halfs.low;}
00272 word GetHighHalf() const {return m_halfs.high;}
00273 word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
00274
00275 private:
00276 union
00277 {
00278 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00279 dword m_whole;
00280 #endif
00281 struct
00282 {
00283 #ifdef IS_LITTLE_ENDIAN
00284 word low;
00285 word high;
00286 #else
00287 word high;
00288 word low;
00289 #endif
00290 } m_halfs;
00291 };
00292 };
00293
00294 class Word
00295 {
00296 public:
00297 Word() {}
00298
00299 Word(word value)
00300 {
00301 m_whole = value;
00302 }
00303
00304 Word(hword low, hword high)
00305 {
00306 m_whole = low | (word(high) << (WORD_BITS/2));
00307 }
00308
00309 static Word Multiply(hword a, hword b)
00310 {
00311 Word r;
00312 r.m_whole = (word)a * b;
00313 return r;
00314 }
00315
00316 Word operator-(Word a)
00317 {
00318 Word r;
00319 r.m_whole = m_whole - a.m_whole;
00320 return r;
00321 }
00322
00323 Word operator-(hword a)
00324 {
00325 Word r;
00326 r.m_whole = m_whole - a;
00327 return r;
00328 }
00329
00330
00331 hword operator/(hword divisor)
00332 {
00333 return hword(m_whole / divisor);
00334 }
00335
00336 bool operator!() const
00337 {
00338 return !m_whole;
00339 }
00340
00341 word GetWhole() const {return m_whole;}
00342 hword GetLowHalf() const {return hword(m_whole);}
00343 hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
00344 hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
00345
00346 private:
00347 word m_whole;
00348 };
00349
00350
00351 template <class S, class D>
00352 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00353 {
00354
00355 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00356
00357
00358 S Q;
00359 if (S(B1+1) == 0)
00360 Q = A[2];
00361 else if (B1 > 0)
00362 Q = D(A[1], A[2]) / S(B1+1);
00363 else
00364 Q = D(A[0], A[1]) / B0;
00365
00366
00367 D p = D::Multiply(B0, Q);
00368 D u = (D) A[0] - p.GetLowHalf();
00369 A[0] = u.GetLowHalf();
00370 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00371 A[1] = u.GetLowHalf();
00372 A[2] += u.GetHighHalf();
00373
00374
00375 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00376 {
00377 u = (D) A[0] - B0;
00378 A[0] = u.GetLowHalf();
00379 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00380 A[1] = u.GetLowHalf();
00381 A[2] += u.GetHighHalf();
00382 Q++;
00383 assert(Q);
00384 }
00385
00386 return Q;
00387 }
00388
00389
00390 template <class S, class D>
00391 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
00392 {
00393 if (!B)
00394 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00395 else
00396 {
00397 S Q[2];
00398 T[0] = Al.GetLowHalf();
00399 T[1] = Al.GetHighHalf();
00400 T[2] = Ah.GetLowHalf();
00401 T[3] = Ah.GetHighHalf();
00402 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00403 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00404 return D(Q[0], Q[1]);
00405 }
00406 }
00407
00408
00409 inline word DWord::operator/(word a)
00410 {
00411 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00412 return word(m_whole / a);
00413 #else
00414 hword r[4];
00415 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00416 #endif
00417 }
00418
00419 inline word DWord::operator%(word a)
00420 {
00421 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00422 return word(m_whole % a);
00423 #else
00424 if (a < (word(1) << (WORD_BITS/2)))
00425 {
00426 hword h = hword(a);
00427 word r = m_halfs.high % h;
00428 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00429 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00430 }
00431 else
00432 {
00433 hword r[4];
00434 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00435 return Word(r[0], r[1]).GetWhole();
00436 }
00437 #endif
00438 }
00439
00440
00441
00442
00443 #if defined(__GNUC__)
00444 #define AddPrologue \
00445 int result; \
00446 __asm__ __volatile__ \
00447 ( \
00448 ".intel_syntax noprefix;"
00449 #define AddEpilogue \
00450 ".att_syntax prefix;" \
00451 : "=a" (result)\
00452 : "d" (C), "a" (A), "D" (B), "c" (N) \
00453 : "%esi", "memory", "cc" \
00454 );\
00455 return result;
00456 #define MulPrologue \
00457 __asm__ __volatile__ \
00458 ( \
00459 ".intel_syntax noprefix;" \
00460 AS1( push ebx) \
00461 AS2( mov ebx, edx)
00462 #define MulEpilogue \
00463 AS1( pop ebx) \
00464 ".att_syntax prefix;" \
00465 : \
00466 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
00467 : "%esi", "memory", "cc" \
00468 );
00469 #define SquPrologue MulPrologue
00470 #define SquEpilogue \
00471 AS1( pop ebx) \
00472 ".att_syntax prefix;" \
00473 : \
00474 : "d" (s_maskLow16), "c" (C), "a" (A) \
00475 : "%esi", "%edi", "memory", "cc" \
00476 );
00477 #define TopPrologue MulPrologue
00478 #define TopEpilogue \
00479 AS1( pop ebx) \
00480 ".att_syntax prefix;" \
00481 : \
00482 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
00483 : "memory", "cc" \
00484 );
00485 #else
00486 #define AddPrologue \
00487 __asm push edi \
00488 __asm push esi \
00489 __asm mov eax, [esp+12] \
00490 __asm mov edi, [esp+16]
00491 #define AddEpilogue \
00492 __asm pop esi \
00493 __asm pop edi \
00494 __asm ret 8
00495 #if _MSC_VER < 1300
00496 #define SaveEBX __asm push ebx
00497 #define RestoreEBX __asm pop ebx
00498 #else
00499 #define SaveEBX
00500 #define RestoreEBX
00501 #endif
00502 #define SquPrologue \
00503 AS2( mov eax, A) \
00504 AS2( mov ecx, C) \
00505 SaveEBX \
00506 AS2( lea ebx, s_maskLow16)
00507 #define MulPrologue \
00508 AS2( mov eax, A) \
00509 AS2( mov edi, B) \
00510 AS2( mov ecx, C) \
00511 SaveEBX \
00512 AS2( lea ebx, s_maskLow16)
00513 #define TopPrologue \
00514 AS2( mov eax, A) \
00515 AS2( mov edi, B) \
00516 AS2( mov ecx, C) \
00517 AS2( mov esi, L) \
00518 SaveEBX \
00519 AS2( lea ebx, s_maskLow16)
00520 #define SquEpilogue RestoreEBX
00521 #define MulEpilogue RestoreEBX
00522 #define TopEpilogue RestoreEBX
00523 #endif
00524
00525 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00526 extern "C" {
00527 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
00528 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
00529 }
00530 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
00531 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
00532 {
00533 word result;
00534 __asm__ __volatile__
00535 (
00536 ".intel_syntax;"
00537 AS1( neg %1)
00538 ASJ( jz, 1, f)
00539 AS2( mov %0,[%3+8*%1])
00540 AS2( add %0,[%4+8*%1])
00541 AS2( mov [%2+8*%1],%0)
00542 ASL(0)
00543 AS2( mov %0,[%3+8*%1+8])
00544 AS2( adc %0,[%4+8*%1+8])
00545 AS2( mov [%2+8*%1+8],%0)
00546 AS2( lea %1,[%1+2])
00547 ASJ( jrcxz, 1, f)
00548 AS2( mov %0,[%3+8*%1])
00549 AS2( adc %0,[%4+8*%1])
00550 AS2( mov [%2+8*%1],%0)
00551 ASJ( jmp, 0, b)
00552 ASL(1)
00553 AS2( mov %0, 0)
00554 AS2( adc %0, %0)
00555 ".att_syntax;"
00556 : "=&r" (result), "+c" (N)
00557 : "r" (C+N), "r" (A+N), "r" (B+N)
00558 : "memory", "cc"
00559 );
00560 return (int)result;
00561 }
00562
00563 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00564 {
00565 word result;
00566 __asm__ __volatile__
00567 (
00568 ".intel_syntax;"
00569 AS1( neg %1)
00570 ASJ( jz, 1, f)
00571 AS2( mov %0,[%3+8*%1])
00572 AS2( sub %0,[%4+8*%1])
00573 AS2( mov [%2+8*%1],%0)
00574 ASL(0)
00575 AS2( mov %0,[%3+8*%1+8])
00576 AS2( sbb %0,[%4+8*%1+8])
00577 AS2( mov [%2+8*%1+8],%0)
00578 AS2( lea %1,[%1+2])
00579 ASJ( jrcxz, 1, f)
00580 AS2( mov %0,[%3+8*%1])
00581 AS2( sbb %0,[%4+8*%1])
00582 AS2( mov [%2+8*%1],%0)
00583 ASJ( jmp, 0, b)
00584 ASL(1)
00585 AS2( mov %0, 0)
00586 AS2( adc %0, %0)
00587 ".att_syntax;"
00588 : "=&r" (result), "+c" (N)
00589 : "r" (C+N), "r" (A+N), "r" (B+N)
00590 : "memory", "cc"
00591 );
00592 return (int)result;
00593 }
00594 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
00595 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00596 {
00597 AddPrologue
00598
00599
00600 AS2( lea eax, [eax+4*ecx])
00601 AS2( lea edi, [edi+4*ecx])
00602 AS2( lea edx, [edx+4*ecx])
00603
00604 AS1( neg ecx)
00605 AS2( test ecx, 2)
00606 ASJ( jz, 0, f)
00607 AS2( sub ecx, 2)
00608 ASJ( jmp, 1, f)
00609
00610 ASL(0)
00611 ASJ( jecxz, 2, f)
00612 AS2( mov esi,[eax+4*ecx])
00613 AS2( adc esi,[edi+4*ecx])
00614 AS2( mov [edx+4*ecx],esi)
00615 AS2( mov esi,[eax+4*ecx+4])
00616 AS2( adc esi,[edi+4*ecx+4])
00617 AS2( mov [edx+4*ecx+4],esi)
00618 ASL(1)
00619 AS2( mov esi,[eax+4*ecx+8])
00620 AS2( adc esi,[edi+4*ecx+8])
00621 AS2( mov [edx+4*ecx+8],esi)
00622 AS2( mov esi,[eax+4*ecx+12])
00623 AS2( adc esi,[edi+4*ecx+12])
00624 AS2( mov [edx+4*ecx+12],esi)
00625
00626 AS2( lea ecx,[ecx+4])
00627 ASJ( jmp, 0, b)
00628
00629 ASL(2)
00630 AS2( mov eax, 0)
00631 AS1( setc al)
00632
00633 AddEpilogue
00634 }
00635
00636 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00637 {
00638 AddPrologue
00639
00640
00641 AS2( lea eax, [eax+4*ecx])
00642 AS2( lea edi, [edi+4*ecx])
00643 AS2( lea edx, [edx+4*ecx])
00644
00645 AS1( neg ecx)
00646 AS2( test ecx, 2)
00647 ASJ( jz, 0, f)
00648 AS2( sub ecx, 2)
00649 ASJ( jmp, 1, f)
00650
00651 ASL(0)
00652 ASJ( jecxz, 2, f)
00653 AS2( mov esi,[eax+4*ecx])
00654 AS2( sbb esi,[edi+4*ecx])
00655 AS2( mov [edx+4*ecx],esi)
00656 AS2( mov esi,[eax+4*ecx+4])
00657 AS2( sbb esi,[edi+4*ecx+4])
00658 AS2( mov [edx+4*ecx+4],esi)
00659 ASL(1)
00660 AS2( mov esi,[eax+4*ecx+8])
00661 AS2( sbb esi,[edi+4*ecx+8])
00662 AS2( mov [edx+4*ecx+8],esi)
00663 AS2( mov esi,[eax+4*ecx+12])
00664 AS2( sbb esi,[edi+4*ecx+12])
00665 AS2( mov [edx+4*ecx+12],esi)
00666
00667 AS2( lea ecx,[ecx+4])
00668 ASJ( jmp, 0, b)
00669
00670 ASL(2)
00671 AS2( mov eax, 0)
00672 AS1( setc al)
00673
00674 AddEpilogue
00675 }
00676
00677 #if CRYPTOPP_INTEGER_SSE2
00678 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
00679 {
00680 AddPrologue
00681
00682
00683 AS2( lea eax, [eax+4*ecx])
00684 AS2( lea edi, [edi+4*ecx])
00685 AS2( lea edx, [edx+4*ecx])
00686
00687 AS1( neg ecx)
00688 AS2( pxor mm2, mm2)
00689 ASJ( jz, 2, f)
00690 AS2( test ecx, 2)
00691 ASJ( jz, 0, f)
00692 AS2( sub ecx, 2)
00693 ASJ( jmp, 1, f)
00694
00695 ASL(0)
00696 AS2( movd mm0, DWORD PTR [eax+4*ecx])
00697 AS2( movd mm1, DWORD PTR [edi+4*ecx])
00698 AS2( paddq mm0, mm1)
00699 AS2( paddq mm2, mm0)
00700 AS2( movd DWORD PTR [edx+4*ecx], mm2)
00701 AS2( psrlq mm2, 32)
00702
00703 AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
00704 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
00705 AS2( paddq mm0, mm1)
00706 AS2( paddq mm2, mm0)
00707 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
00708 AS2( psrlq mm2, 32)
00709
00710 ASL(1)
00711 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
00712 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
00713 AS2( paddq mm0, mm1)
00714 AS2( paddq mm2, mm0)
00715 AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
00716 AS2( psrlq mm2, 32)
00717
00718 AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
00719 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
00720 AS2( paddq mm0, mm1)
00721 AS2( paddq mm2, mm0)
00722 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
00723 AS2( psrlq mm2, 32)
00724
00725 AS2( add ecx, 4)
00726 ASJ( jnz, 0, b)
00727
00728 ASL(2)
00729 AS2( movd eax, mm2)
00730 AS1( emms)
00731
00732 AddEpilogue
00733 }
00734 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
00735 {
00736 AddPrologue
00737
00738
00739 AS2( lea eax, [eax+4*ecx])
00740 AS2( lea edi, [edi+4*ecx])
00741 AS2( lea edx, [edx+4*ecx])
00742
00743 AS1( neg ecx)
00744 AS2( pxor mm2, mm2)
00745 ASJ( jz, 2, f)
00746 AS2( test ecx, 2)
00747 ASJ( jz, 0, f)
00748 AS2( sub ecx, 2)
00749 ASJ( jmp, 1, f)
00750
00751 ASL(0)
00752 AS2( movd mm0, DWORD PTR [eax+4*ecx])
00753 AS2( movd mm1, DWORD PTR [edi+4*ecx])
00754 AS2( psubq mm0, mm1)
00755 AS2( psubq mm0, mm2)
00756 AS2( movd DWORD PTR [edx+4*ecx], mm0)
00757 AS2( psrlq mm0, 63)
00758
00759 AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
00760 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
00761 AS2( psubq mm2, mm1)
00762 AS2( psubq mm2, mm0)
00763 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
00764 AS2( psrlq mm2, 63)
00765
00766 ASL(1)
00767 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
00768 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
00769 AS2( psubq mm0, mm1)
00770 AS2( psubq mm0, mm2)
00771 AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
00772 AS2( psrlq mm0, 63)
00773
00774 AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
00775 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
00776 AS2( psubq mm2, mm1)
00777 AS2( psubq mm2, mm0)
00778 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
00779 AS2( psrlq mm2, 63)
00780
00781 AS2( add ecx, 4)
00782 ASJ( jnz, 0, b)
00783
00784 ASL(2)
00785 AS2( movd eax, mm2)
00786 AS1( emms)
00787
00788 AddEpilogue
00789 }
00790 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00791 #else
00792 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00793 {
00794 assert (N%2 == 0);
00795
00796 Declare2Words(u);
00797 AssignWord(u, 0);
00798 for (size_t i=0; i<N; i+=2)
00799 {
00800 AddWithCarry(u, A[i], B[i]);
00801 C[i] = LowWord(u);
00802 AddWithCarry(u, A[i+1], B[i+1]);
00803 C[i+1] = LowWord(u);
00804 }
00805 return int(GetCarry(u));
00806 }
00807
00808 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00809 {
00810 assert (N%2 == 0);
00811
00812 Declare2Words(u);
00813 AssignWord(u, 0);
00814 for (size_t i=0; i<N; i+=2)
00815 {
00816 SubtractWithBorrow(u, A[i], B[i]);
00817 C[i] = LowWord(u);
00818 SubtractWithBorrow(u, A[i+1], B[i+1]);
00819 C[i+1] = LowWord(u);
00820 }
00821 return int(GetBorrow(u));
00822 }
00823 #endif
00824
00825 static word LinearMultiply(word *C, const word *A, word B, size_t N)
00826 {
00827 word carry=0;
00828 for(unsigned i=0; i<N; i++)
00829 {
00830 Declare2Words(p);
00831 MultiplyWords(p, A[i], B);
00832 Acc2WordsBy1(p, carry);
00833 C[i] = LowWord(p);
00834 carry = HighWord(p);
00835 }
00836 return carry;
00837 }
00838
00839 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
00840
00841 #define Mul_2 \
00842 Mul_Begin(2) \
00843 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00844 Mul_End(1, 1)
00845
00846 #define Mul_4 \
00847 Mul_Begin(4) \
00848 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00849 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00850 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00851 Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
00852 Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
00853 Mul_End(5, 3)
00854
00855 #define Mul_8 \
00856 Mul_Begin(8) \
00857 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00858 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00859 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00860 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00861 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00862 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00863 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00864 Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
00865 Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
00866 Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
00867 Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
00868 Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
00869 Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
00870 Mul_End(13, 7)
00871
00872 #define Mul_16 \
00873 Mul_Begin(16) \
00874 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00875 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00876 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00877 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00878 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00879 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00880 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00881 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00882 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00883 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
00884 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
00885 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
00886 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
00887 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
00888 Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
00889 Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
00890 Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
00891 Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
00892 Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
00893 Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
00894 Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
00895 Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
00896 Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
00897 Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
00898 Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
00899 Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
00900 Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
00901 Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
00902 Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
00903 Mul_End(29, 15)
00904
00905 #define Squ_2 \
00906 Squ_Begin(2) \
00907 Squ_End(2)
00908
00909 #define Squ_4 \
00910 Squ_Begin(4) \
00911 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00912 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00913 Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
00914 Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
00915 Squ_End(4)
00916
00917 #define Squ_8 \
00918 Squ_Begin(8) \
00919 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00920 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00921 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00922 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00923 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00924 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00925 Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00926 Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
00927 Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00928 Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00929 Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
00930 Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
00931 Squ_End(8)
00932
00933 #define Squ_16 \
00934 Squ_Begin(16) \
00935 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00936 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00937 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00938 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00939 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00940 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00941 Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00942 Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
00943 Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00944 Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00945 Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
00946 Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
00947 Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
00948 Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
00949 Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
00950 Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
00951 Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
00952 Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
00953 Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
00954 Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
00955 Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
00956 Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
00957 Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
00958 Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
00959 Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
00960 Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
00961 Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
00962 Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
00963 Squ_End(16)
00964
00965 #define Bot_2 \
00966 Mul_Begin(2) \
00967 Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
00968 Bot_End(2)
00969
00970 #define Bot_4 \
00971 Mul_Begin(4) \
00972 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00973 Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
00974 Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
00975 Bot_End(4)
00976
00977 #define Bot_8 \
00978 Mul_Begin(8) \
00979 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00980 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00981 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00982 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00983 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00984 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00985 Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
00986 Bot_End(8)
00987
00988 #define Bot_16 \
00989 Mul_Begin(16) \
00990 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00991 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00992 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00993 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00994 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00995 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00996 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00997 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00998 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00999 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
01000 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
01001 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
01002 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
01003 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
01004 Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
01005 Bot_End(16)
01006
01007 #endif
01008
01009 #if 0
01010 #define Mul_Begin(n) \
01011 Declare2Words(p) \
01012 Declare2Words(c) \
01013 Declare2Words(d) \
01014 MultiplyWords(p, A[0], B[0]) \
01015 AssignWord(c, LowWord(p)) \
01016 AssignWord(d, HighWord(p))
01017
01018 #define Mul_Acc(i, j) \
01019 MultiplyWords(p, A[i], B[j]) \
01020 Acc2WordsBy1(c, LowWord(p)) \
01021 Acc2WordsBy1(d, HighWord(p))
01022
01023 #define Mul_SaveAcc(k, i, j) \
01024 R[k] = LowWord(c); \
01025 Add2WordsBy1(c, d, HighWord(c)) \
01026 MultiplyWords(p, A[i], B[j]) \
01027 AssignWord(d, HighWord(p)) \
01028 Acc2WordsBy1(c, LowWord(p))
01029
01030 #define Mul_End(n) \
01031 R[2*n-3] = LowWord(c); \
01032 Acc2WordsBy1(d, HighWord(c)) \
01033 MultiplyWords(p, A[n-1], B[n-1])\
01034 Acc2WordsBy2(d, p) \
01035 R[2*n-2] = LowWord(d); \
01036 R[2*n-1] = HighWord(d);
01037
01038 #define Bot_SaveAcc(k, i, j) \
01039 R[k] = LowWord(c); \
01040 word e = LowWord(d) + HighWord(c); \
01041 e += A[i] * B[j];
01042
01043 #define Bot_Acc(i, j) \
01044 e += A[i] * B[j];
01045
01046 #define Bot_End(n) \
01047 R[n-1] = e;
01048 #else
01049 #define Mul_Begin(n) \
01050 Declare2Words(p) \
01051 word c; \
01052 Declare2Words(d) \
01053 MultiplyWords(p, A[0], B[0]) \
01054 c = LowWord(p); \
01055 AssignWord(d, HighWord(p))
01056
01057 #define Mul_Acc(i, j) \
01058 MulAcc(c, d, A[i], B[j])
01059
01060 #define Mul_SaveAcc(k, i, j) \
01061 R[k] = c; \
01062 c = LowWord(d); \
01063 AssignWord(d, HighWord(d)) \
01064 MulAcc(c, d, A[i], B[j])
01065
01066 #define Mul_End(k, i) \
01067 R[k] = c; \
01068 MultiplyWords(p, A[i], B[i]) \
01069 Acc2WordsBy2(p, d) \
01070 R[k+1] = LowWord(p); \
01071 R[k+2] = HighWord(p);
01072
01073 #define Bot_SaveAcc(k, i, j) \
01074 R[k] = c; \
01075 c = LowWord(d); \
01076 c += A[i] * B[j];
01077
01078 #define Bot_Acc(i, j) \
01079 c += A[i] * B[j];
01080
01081 #define Bot_End(n) \
01082 R[n-1] = c;
01083 #endif
01084
01085 #define Squ_Begin(n) \
01086 Declare2Words(p) \
01087 word c; \
01088 Declare2Words(d) \
01089 Declare2Words(e) \
01090 MultiplyWords(p, A[0], A[0]) \
01091 R[0] = LowWord(p); \
01092 AssignWord(e, HighWord(p)) \
01093 MultiplyWords(p, A[0], A[1]) \
01094 c = LowWord(p); \
01095 AssignWord(d, HighWord(p)) \
01096 Squ_NonDiag \
01097
01098 #define Squ_NonDiag \
01099 Double3Words(c, d)
01100
01101 #define Squ_SaveAcc(k, i, j) \
01102 Acc3WordsBy2(c, d, e) \
01103 R[k] = c; \
01104 MultiplyWords(p, A[i], A[j]) \
01105 c = LowWord(p); \
01106 AssignWord(d, HighWord(p)) \
01107
01108 #define Squ_Acc(i, j) \
01109 MulAcc(c, d, A[i], A[j])
01110
01111 #define Squ_Diag(i) \
01112 Squ_NonDiag \
01113 MulAcc(c, d, A[i], A[i])
01114
01115 #define Squ_End(n) \
01116 Acc3WordsBy2(c, d, e) \
01117 R[2*n-3] = c; \
01118 MultiplyWords(p, A[n-1], A[n-1])\
01119 Acc2WordsBy2(p, e) \
01120 R[2*n-2] = LowWord(p); \
01121 R[2*n-1] = HighWord(p);
01122
01123 void Baseline_Multiply2(word *R, const word *A, const word *B)
01124 {
01125 Mul_2
01126 }
01127
01128 void Baseline_Multiply4(word *R, const word *A, const word *B)
01129 {
01130 Mul_4
01131 }
01132
01133 void Baseline_Multiply8(word *R, const word *A, const word *B)
01134 {
01135 Mul_8
01136 }
01137
01138 void Baseline_Square2(word *R, const word *A)
01139 {
01140 Squ_2
01141 }
01142
01143 void Baseline_Square4(word *R, const word *A)
01144 {
01145 Squ_4
01146 }
01147
01148 void Baseline_Square8(word *R, const word *A)
01149 {
01150 Squ_8
01151 }
01152
01153 void Baseline_MultiplyBottom2(word *R, const word *A, const word *B)
01154 {
01155 Bot_2
01156 }
01157
01158 void Baseline_MultiplyBottom4(word *R, const word *A, const word *B)
01159 {
01160 Bot_4
01161 }
01162
01163 void Baseline_MultiplyBottom8(word *R, const word *A, const word *B)
01164 {
01165 Bot_8
01166 }
01167
01168 #define Top_Begin(n) \
01169 Declare2Words(p) \
01170 word c; \
01171 Declare2Words(d) \
01172 MultiplyWords(p, A[0], B[n-2]);\
01173 AssignWord(d, HighWord(p));
01174
01175 #define Top_Acc(i, j) \
01176 MultiplyWords(p, A[i], B[j]);\
01177 Acc2WordsBy1(d, HighWord(p));
01178
01179 #define Top_SaveAcc0(i, j) \
01180 c = LowWord(d); \
01181 AssignWord(d, HighWord(d)) \
01182 MulAcc(c, d, A[i], B[j])
01183
01184 #define Top_SaveAcc1(i, j) \
01185 c = L<c; \
01186 Acc2WordsBy1(d, c); \
01187 c = LowWord(d); \
01188 AssignWord(d, HighWord(d)) \
01189 MulAcc(c, d, A[i], B[j])
01190
01191 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
01192 {
01193 word T[4];
01194 Baseline_Multiply2(T, A, B);
01195 R[0] = T[2];
01196 R[1] = T[3];
01197 }
01198
01199 void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L)
01200 {
01201 Top_Begin(4)
01202 Top_Acc(1, 1) Top_Acc(2, 0) \
01203 Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
01204 Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
01205 Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
01206 Mul_End(1, 3)
01207 }
01208
01209 void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L)
01210 {
01211 Top_Begin(8)
01212 Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
01213 Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
01214 Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
01215 Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
01216 Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
01217 Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
01218 Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
01219 Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
01220 Mul_End(5, 7)
01221 }
01222
01223 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
01224 void Baseline_Multiply16(word *R, const word *A, const word *B)
01225 {
01226 Mul_16
01227 }
01228
01229 void Baseline_Square16(word *R, const word *A)
01230 {
01231 Squ_16
01232 }
01233
01234 void Baseline_MultiplyBottom16(word *R, const word *A, const word *B)
01235 {
01236 Bot_16
01237 }
01238
01239 void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L)
01240 {
01241 Top_Begin(16)
01242 Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
01243 Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
01244 Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
01245 Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
01246 Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
01247 Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
01248 Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
01249 Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
01250 Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
01251 Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
01252 Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
01253 Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
01254 Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
01255 Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
01256 Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
01257 Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
01258 Mul_End(13, 15)
01259 }
01260 #endif
01261
01262
01263
01264 #if CRYPTOPP_INTEGER_SSE2
01265
01266 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
01267
01268 #undef Mul_Begin
01269 #undef Mul_Acc
01270 #undef Top_Begin
01271 #undef Top_Acc
01272 #undef Squ_Acc
01273 #undef Squ_NonDiag
01274 #undef Squ_Diag
01275 #undef Squ_SaveAcc
01276 #undef Squ_Begin
01277 #undef Mul_SaveAcc
01278 #undef Bot_Acc
01279 #undef Bot_SaveAcc
01280 #undef Bot_End
01281 #undef Squ_End
01282 #undef Mul_End
01283
01284 #define SSE2_FinalSave(k) \
01285 AS2( psllq xmm5, 16) \
01286 AS2( paddq xmm4, xmm5) \
01287 AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
01288
01289 #define SSE2_SaveShift(k) \
01290 AS2( movq xmm0, xmm6) \
01291 AS2( punpckhqdq xmm6, xmm0) \
01292 AS2( movq xmm1, xmm7) \
01293 AS2( punpckhqdq xmm7, xmm1) \
01294 AS2( paddd xmm6, xmm0) \
01295 AS2( pslldq xmm6, 4) \
01296 AS2( paddd xmm7, xmm1) \
01297 AS2( paddd xmm4, xmm6) \
01298 AS2( pslldq xmm7, 4) \
01299 AS2( movq xmm6, xmm4) \
01300 AS2( paddd xmm5, xmm7) \
01301 AS2( movq xmm7, xmm5) \
01302 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
01303 AS2( psrlq xmm6, 16) \
01304 AS2( paddq xmm6, xmm7) \
01305 AS2( punpckhqdq xmm4, xmm0) \
01306 AS2( punpckhqdq xmm5, xmm0) \
01307 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
01308 AS2( psrlq xmm6, 3*16) \
01309 AS2( paddd xmm4, xmm6) \
01310
01311 #define Squ_SSE2_SaveShift(k) \
01312 AS2( movq xmm0, xmm6) \
01313 AS2( punpckhqdq xmm6, xmm0) \
01314 AS2( movq xmm1, xmm7) \
01315 AS2( punpckhqdq xmm7, xmm1) \
01316 AS2( paddd xmm6, xmm0) \
01317 AS2( pslldq xmm6, 4) \
01318 AS2( paddd xmm7, xmm1) \
01319 AS2( paddd xmm4, xmm6) \
01320 AS2( pslldq xmm7, 4) \
01321 AS2( movhlps xmm6, xmm4) \
01322 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
01323 AS2( paddd xmm5, xmm7) \
01324 AS2( movhps QWORD PTR [esp+12], xmm5)\
01325 AS2( psrlq xmm4, 16) \
01326 AS2( paddq xmm4, xmm5) \
01327 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
01328 AS2( psrlq xmm4, 3*16) \
01329 AS2( paddd xmm4, xmm6) \
01330 AS2( movq QWORD PTR [esp+4], xmm4)\
01331
01332 #define SSE2_FirstMultiply(i) \
01333 AS2( movdqa xmm7, [esi+(i)*16])\
01334 AS2( movdqa xmm5, [edi-(i)*16])\
01335 AS2( pmuludq xmm5, xmm7) \
01336 AS2( movdqa xmm4, [ebx])\
01337 AS2( movdqa xmm6, xmm4) \
01338 AS2( pand xmm4, xmm5) \
01339 AS2( psrld xmm5, 16) \
01340 AS2( pmuludq xmm7, [edx-(i)*16])\
01341 AS2( pand xmm6, xmm7) \
01342 AS2( psrld xmm7, 16)
01343
01344 #define Squ_Begin(n) \
01345 SquPrologue \
01346 AS2( mov esi, esp)\
01347 AS2( and esp, 0xfffffff0)\
01348 AS2( lea edi, [esp-32*n])\
01349 AS2( sub esp, 32*n+16)\
01350 AS1( push esi)\
01351 AS2( mov esi, edi) \
01352 AS2( xor edx, edx) \
01353 ASL(1) \
01354 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01355 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01356 AS2( movdqa [edi+2*edx], xmm0) \
01357 AS2( psrlq xmm0, 32) \
01358 AS2( movdqa [edi+2*edx+16], xmm0) \
01359 AS2( movdqa [edi+16*n+2*edx], xmm1) \
01360 AS2( psrlq xmm1, 32) \
01361 AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
01362 AS2( add edx, 16) \
01363 AS2( cmp edx, 8*(n)) \
01364 ASJ( jne, 1, b) \
01365 AS2( lea edx, [edi+16*n])\
01366 SSE2_FirstMultiply(0) \
01367
01368 #define Squ_Acc(i) \
01369 ASL(LSqu##i) \
01370 AS2( movdqa xmm1, [esi+(i)*16]) \
01371 AS2( movdqa xmm0, [edi-(i)*16]) \
01372 AS2( movdqa xmm2, [ebx]) \
01373 AS2( pmuludq xmm0, xmm1) \
01374 AS2( pmuludq xmm1, [edx-(i)*16]) \
01375 AS2( movdqa xmm3, xmm2) \
01376 AS2( pand xmm2, xmm0) \
01377 AS2( psrld xmm0, 16) \
01378 AS2( paddd xmm4, xmm2) \
01379 AS2( paddd xmm5, xmm0) \
01380 AS2( pand xmm3, xmm1) \
01381 AS2( psrld xmm1, 16) \
01382 AS2( paddd xmm6, xmm3) \
01383 AS2( paddd xmm7, xmm1) \
01384
01385 #define Squ_Acc1(i)
01386 #define Squ_Acc2(i) ASC(call, LSqu##i)
01387 #define Squ_Acc3(i) Squ_Acc2(i)
01388 #define Squ_Acc4(i) Squ_Acc2(i)
01389 #define Squ_Acc5(i) Squ_Acc2(i)
01390 #define Squ_Acc6(i) Squ_Acc2(i)
01391 #define Squ_Acc7(i) Squ_Acc2(i)
01392 #define Squ_Acc8(i) Squ_Acc2(i)
01393
01394 #define SSE2_End(E, n) \
01395 SSE2_SaveShift(2*(n)-3) \
01396 AS2( movdqa xmm7, [esi+16]) \
01397 AS2( movdqa xmm0, [edi]) \
01398 AS2( pmuludq xmm0, xmm7) \
01399 AS2( movdqa xmm2, [ebx]) \
01400 AS2( pmuludq xmm7, [edx]) \
01401 AS2( movdqa xmm6, xmm2) \
01402 AS2( pand xmm2, xmm0) \
01403 AS2( psrld xmm0, 16) \
01404 AS2( paddd xmm4, xmm2) \
01405 AS2( paddd xmm5, xmm0) \
01406 AS2( pand xmm6, xmm7) \
01407 AS2( psrld xmm7, 16) \
01408 SSE2_SaveShift(2*(n)-2) \
01409 SSE2_FinalSave(2*(n)-1) \
01410 AS1( pop esp)\
01411 E
01412
01413 #define Squ_End(n) SSE2_End(SquEpilogue, n)
01414 #define Mul_End(n) SSE2_End(MulEpilogue, n)
01415 #define Top_End(n) SSE2_End(TopEpilogue, n)
01416
01417 #define Squ_Column1(k, i) \
01418 Squ_SSE2_SaveShift(k) \
01419 AS2( add esi, 16) \
01420 SSE2_FirstMultiply(1)\
01421 Squ_Acc##i(i) \
01422 AS2( paddd xmm4, xmm4) \
01423 AS2( paddd xmm5, xmm5) \
01424 AS2( movdqa xmm3, [esi]) \
01425 AS2( movq xmm1, QWORD PTR [esi+8]) \
01426 AS2( pmuludq xmm1, xmm3) \
01427 AS2( pmuludq xmm3, xmm3) \
01428 AS2( movdqa xmm0, [ebx])\
01429 AS2( movdqa xmm2, xmm0) \
01430 AS2( pand xmm0, xmm1) \
01431 AS2( psrld xmm1, 16) \
01432 AS2( paddd xmm6, xmm0) \
01433 AS2( paddd xmm7, xmm1) \
01434 AS2( pand xmm2, xmm3) \
01435 AS2( psrld xmm3, 16) \
01436 AS2( paddd xmm6, xmm6) \
01437 AS2( paddd xmm7, xmm7) \
01438 AS2( paddd xmm4, xmm2) \
01439 AS2( paddd xmm5, xmm3) \
01440 AS2( movq xmm0, QWORD PTR [esp+4])\
01441 AS2( movq xmm1, QWORD PTR [esp+12])\
01442 AS2( paddd xmm4, xmm0)\
01443 AS2( paddd xmm5, xmm1)\
01444
01445 #define Squ_Column0(k, i) \
01446 Squ_SSE2_SaveShift(k) \
01447 AS2( add edi, 16) \
01448 AS2( add edx, 16) \
01449 SSE2_FirstMultiply(1)\
01450 Squ_Acc##i(i) \
01451 AS2( paddd xmm6, xmm6) \
01452 AS2( paddd xmm7, xmm7) \
01453 AS2( paddd xmm4, xmm4) \
01454 AS2( paddd xmm5, xmm5) \
01455 AS2( movq xmm0, QWORD PTR [esp+4])\
01456 AS2( movq xmm1, QWORD PTR [esp+12])\
01457 AS2( paddd xmm4, xmm0)\
01458 AS2( paddd xmm5, xmm1)\
01459
01460 #define SSE2_MulAdd45 \
01461 AS2( movdqa xmm7, [esi]) \
01462 AS2( movdqa xmm0, [edi]) \
01463 AS2( pmuludq xmm0, xmm7) \
01464 AS2( movdqa xmm2, [ebx]) \
01465 AS2( pmuludq xmm7, [edx]) \
01466 AS2( movdqa xmm6, xmm2) \
01467 AS2( pand xmm2, xmm0) \
01468 AS2( psrld xmm0, 16) \
01469 AS2( paddd xmm4, xmm2) \
01470 AS2( paddd xmm5, xmm0) \
01471 AS2( pand xmm6, xmm7) \
01472 AS2( psrld xmm7, 16)
01473
01474 #define Mul_Begin(n) \
01475 MulPrologue \
01476 AS2( mov esi, esp)\
01477 AS2( and esp, 0xfffffff0)\
01478 AS2( sub esp, 48*n+16)\
01479 AS1( push esi)\
01480 AS2( xor edx, edx) \
01481 ASL(1) \
01482 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01483 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01484 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
01485 AS2( movdqa [esp+20+2*edx], xmm0) \
01486 AS2( psrlq xmm0, 32) \
01487 AS2( movdqa [esp+20+2*edx+16], xmm0) \
01488 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
01489 AS2( psrlq xmm1, 32) \
01490 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
01491 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
01492 AS2( psrlq xmm2, 32) \
01493 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
01494 AS2( add edx, 16) \
01495 AS2( cmp edx, 8*(n)) \
01496 ASJ( jne, 1, b) \
01497 AS2( lea edi, [esp+20])\
01498 AS2( lea edx, [esp+20+16*n])\
01499 AS2( lea esi, [esp+20+32*n])\
01500 SSE2_FirstMultiply(0) \
01501
01502 #define Mul_Acc(i) \
01503 ASL(LMul##i) \
01504 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
01505 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
01506 AS2( movdqa xmm2, [ebx]) \
01507 AS2( pmuludq xmm0, xmm1) \
01508 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01509 AS2( movdqa xmm3, xmm2) \
01510 AS2( pand xmm2, xmm0) \
01511 AS2( psrld xmm0, 16) \
01512 AS2( paddd xmm4, xmm2) \
01513 AS2( paddd xmm5, xmm0) \
01514 AS2( pand xmm3, xmm1) \
01515 AS2( psrld xmm1, 16) \
01516 AS2( paddd xmm6, xmm3) \
01517 AS2( paddd xmm7, xmm1) \
01518
01519 #define Mul_Acc1(i)
01520 #define Mul_Acc2(i) ASC(call, LMul##i)
01521 #define Mul_Acc3(i) Mul_Acc2(i)
01522 #define Mul_Acc4(i) Mul_Acc2(i)
01523 #define Mul_Acc5(i) Mul_Acc2(i)
01524 #define Mul_Acc6(i) Mul_Acc2(i)
01525 #define Mul_Acc7(i) Mul_Acc2(i)
01526 #define Mul_Acc8(i) Mul_Acc2(i)
01527 #define Mul_Acc9(i) Mul_Acc2(i)
01528 #define Mul_Acc10(i) Mul_Acc2(i)
01529 #define Mul_Acc11(i) Mul_Acc2(i)
01530 #define Mul_Acc12(i) Mul_Acc2(i)
01531 #define Mul_Acc13(i) Mul_Acc2(i)
01532 #define Mul_Acc14(i) Mul_Acc2(i)
01533 #define Mul_Acc15(i) Mul_Acc2(i)
01534 #define Mul_Acc16(i) Mul_Acc2(i)
01535
01536 #define Mul_Column1(k, i) \
01537 SSE2_SaveShift(k) \
01538 AS2( add esi, 16) \
01539 SSE2_MulAdd45\
01540 Mul_Acc##i(i) \
01541
01542 #define Mul_Column0(k, i) \
01543 SSE2_SaveShift(k) \
01544 AS2( add edi, 16) \
01545 AS2( add edx, 16) \
01546 SSE2_MulAdd45\
01547 Mul_Acc##i(i) \
01548
01549 #define Bot_Acc(i) \
01550 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
01551 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
01552 AS2( pmuludq xmm0, xmm1) \
01553 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01554 AS2( paddq xmm4, xmm0) \
01555 AS2( paddd xmm6, xmm1)
01556
01557 #define Bot_SaveAcc(k) \
01558 SSE2_SaveShift(k) \
01559 AS2( add edi, 16) \
01560 AS2( add edx, 16) \
01561 AS2( movdqa xmm6, [esi]) \
01562 AS2( movdqa xmm0, [edi]) \
01563 AS2( pmuludq xmm0, xmm6) \
01564 AS2( paddq xmm4, xmm0) \
01565 AS2( psllq xmm5, 16) \
01566 AS2( paddq xmm4, xmm5) \
01567 AS2( pmuludq xmm6, [edx])
01568
01569 #define Bot_End(n) \
01570 AS2( movhlps xmm7, xmm6) \
01571 AS2( paddd xmm6, xmm7) \
01572 AS2( psllq xmm6, 32) \
01573 AS2( paddd xmm4, xmm6) \
01574 AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
01575 AS1( pop esp)\
01576 MulEpilogue
01577
01578 #define Top_Begin(n) \
01579 TopPrologue \
01580 AS2( mov edx, esp)\
01581 AS2( and esp, 0xfffffff0)\
01582 AS2( sub esp, 48*n+16)\
01583 AS1( push edx)\
01584 AS2( xor edx, edx) \
01585 ASL(1) \
01586 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01587 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01588 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
01589 AS2( movdqa [esp+20+2*edx], xmm0) \
01590 AS2( psrlq xmm0, 32) \
01591 AS2( movdqa [esp+20+2*edx+16], xmm0) \
01592 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
01593 AS2( psrlq xmm1, 32) \
01594 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
01595 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
01596 AS2( psrlq xmm2, 32) \
01597 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
01598 AS2( add edx, 16) \
01599 AS2( cmp edx, 8*(n)) \
01600 ASJ( jne, 1, b) \
01601 AS2( mov eax, esi) \
01602 AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
01603 AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
01604 AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
01605 AS2( pxor xmm4, xmm4)\
01606 AS2( pxor xmm5, xmm5)
01607
01608 #define Top_Acc(i) \
01609 AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
01610 AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01611 AS2( psrlq xmm0, 48) \
01612 AS2( paddd xmm5, xmm0)\
01613
01614 #define Top_Column0(i) \
01615 AS2( psllq xmm5, 32) \
01616 AS2( add edi, 16) \
01617 AS2( add edx, 16) \
01618 SSE2_MulAdd45\
01619 Mul_Acc##i(i) \
01620
01621 #define Top_Column1(i) \
01622 SSE2_SaveShift(0) \
01623 AS2( add esi, 16) \
01624 SSE2_MulAdd45\
01625 Mul_Acc##i(i) \
01626 AS2( shr eax, 16) \
01627 AS2( movd xmm0, eax)\
01628 AS2( movd xmm1, [ecx+4])\
01629 AS2( psrld xmm1, 16)\
01630 AS2( pcmpgtd xmm1, xmm0)\
01631 AS2( psrld xmm1, 31)\
01632 AS2( paddd xmm4, xmm1)\
01633
01634 void SSE2_Square4(word *C, const word *A)
01635 {
01636 Squ_Begin(2)
01637 Squ_Column0(0, 1)
01638 Squ_End(2)
01639 }
01640
01641 void SSE2_Square8(word *C, const word *A)
01642 {
01643 Squ_Begin(4)
01644 #ifndef __GNUC__
01645 ASJ( jmp, 0, f)
01646 Squ_Acc(2)
01647 AS1( ret) ASL(0)
01648 #endif
01649 Squ_Column0(0, 1)
01650 Squ_Column1(1, 1)
01651 Squ_Column0(2, 2)
01652 Squ_Column1(3, 1)
01653 Squ_Column0(4, 1)
01654 Squ_End(4)
01655 }
01656
01657 void SSE2_Square16(word *C, const word *A)
01658 {
01659 Squ_Begin(8)
01660 #ifndef __GNUC__
01661 ASJ( jmp, 0, f)
01662 Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01663 AS1( ret) ASL(0)
01664 #endif
01665 Squ_Column0(0, 1)
01666 Squ_Column1(1, 1)
01667 Squ_Column0(2, 2)
01668 Squ_Column1(3, 2)
01669 Squ_Column0(4, 3)
01670 Squ_Column1(5, 3)
01671 Squ_Column0(6, 4)
01672 Squ_Column1(7, 3)
01673 Squ_Column0(8, 3)
01674 Squ_Column1(9, 2)
01675 Squ_Column0(10, 2)
01676 Squ_Column1(11, 1)
01677 Squ_Column0(12, 1)
01678 Squ_End(8)
01679 }
01680
01681 void SSE2_Square32(word *C, const word *A)
01682 {
01683 Squ_Begin(16)
01684 ASJ( jmp, 0, f)
01685 Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01686 AS1( ret) ASL(0)
01687 Squ_Column0(0, 1)
01688 Squ_Column1(1, 1)
01689 Squ_Column0(2, 2)
01690 Squ_Column1(3, 2)
01691 Squ_Column0(4, 3)
01692 Squ_Column1(5, 3)
01693 Squ_Column0(6, 4)
01694 Squ_Column1(7, 4)
01695 Squ_Column0(8, 5)
01696 Squ_Column1(9, 5)
01697 Squ_Column0(10, 6)
01698 Squ_Column1(11, 6)
01699 Squ_Column0(12, 7)
01700 Squ_Column1(13, 7)
01701 Squ_Column0(14, 8)
01702 Squ_Column1(15, 7)
01703 Squ_Column0(16, 7)
01704 Squ_Column1(17, 6)
01705 Squ_Column0(18, 6)
01706 Squ_Column1(19, 5)
01707 Squ_Column0(20, 5)
01708 Squ_Column1(21, 4)
01709 Squ_Column0(22, 4)
01710 Squ_Column1(23, 3)
01711 Squ_Column0(24, 3)
01712 Squ_Column1(25, 2)
01713 Squ_Column0(26, 2)
01714 Squ_Column1(27, 1)
01715 Squ_Column0(28, 1)
01716 Squ_End(16)
01717 }
01718
01719 void SSE2_Multiply4(word *C, const word *A, const word *B)
01720 {
01721 Mul_Begin(2)
01722 #ifndef __GNUC__
01723 ASJ( jmp, 0, f)
01724 Mul_Acc(2)
01725 AS1( ret) ASL(0)
01726 #endif
01727 Mul_Column0(0, 2)
01728 Mul_End(2)
01729 }
01730
01731 void SSE2_Multiply8(word *C, const word *A, const word *B)
01732 {
01733 Mul_Begin(4)
01734 #ifndef __GNUC__
01735 ASJ( jmp, 0, f)
01736 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01737 AS1( ret) ASL(0)
01738 #endif
01739 Mul_Column0(0, 2)
01740 Mul_Column1(1, 3)
01741 Mul_Column0(2, 4)
01742 Mul_Column1(3, 3)
01743 Mul_Column0(4, 2)
01744 Mul_End(4)
01745 }
01746
01747 void SSE2_Multiply16(word *C, const word *A, const word *B)
01748 {
01749 Mul_Begin(8)
01750 #ifndef __GNUC__
01751 ASJ( jmp, 0, f)
01752 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01753 AS1( ret) ASL(0)
01754 #endif
01755 Mul_Column0(0, 2)
01756 Mul_Column1(1, 3)
01757 Mul_Column0(2, 4)
01758 Mul_Column1(3, 5)
01759 Mul_Column0(4, 6)
01760 Mul_Column1(5, 7)
01761 Mul_Column0(6, 8)
01762 Mul_Column1(7, 7)
01763 Mul_Column0(8, 6)
01764 Mul_Column1(9, 5)
01765 Mul_Column0(10, 4)
01766 Mul_Column1(11, 3)
01767 Mul_Column0(12, 2)
01768 Mul_End(8)
01769 }
01770
01771 void SSE2_Multiply32(word *C, const word *A, const word *B)
01772 {
01773 Mul_Begin(16)
01774 ASJ( jmp, 0, f)
01775 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01776 AS1( ret) ASL(0)
01777 Mul_Column0(0, 2)
01778 Mul_Column1(1, 3)
01779 Mul_Column0(2, 4)
01780 Mul_Column1(3, 5)
01781 Mul_Column0(4, 6)
01782 Mul_Column1(5, 7)
01783 Mul_Column0(6, 8)
01784 Mul_Column1(7, 9)
01785 Mul_Column0(8, 10)
01786 Mul_Column1(9, 11)
01787 Mul_Column0(10, 12)
01788 Mul_Column1(11, 13)
01789 Mul_Column0(12, 14)
01790 Mul_Column1(13, 15)
01791 Mul_Column0(14, 16)
01792 Mul_Column1(15, 15)
01793 Mul_Column0(16, 14)
01794 Mul_Column1(17, 13)
01795 Mul_Column0(18, 12)
01796 Mul_Column1(19, 11)
01797 Mul_Column0(20, 10)
01798 Mul_Column1(21, 9)
01799 Mul_Column0(22, 8)
01800 Mul_Column1(23, 7)
01801 Mul_Column0(24, 6)
01802 Mul_Column1(25, 5)
01803 Mul_Column0(26, 4)
01804 Mul_Column1(27, 3)
01805 Mul_Column0(28, 2)
01806 Mul_End(16)
01807 }
01808
01809 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
01810 {
01811 Mul_Begin(2)
01812 Bot_SaveAcc(0) Bot_Acc(2)
01813 Bot_End(2)
01814 }
01815
01816 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
01817 {
01818 Mul_Begin(4)
01819 #ifndef __GNUC__
01820 ASJ( jmp, 0, f)
01821 Mul_Acc(3) Mul_Acc(2)
01822 AS1( ret) ASL(0)
01823 #endif
01824 Mul_Column0(0, 2)
01825 Mul_Column1(1, 3)
01826 Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01827 Bot_End(4)
01828 }
01829
01830 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
01831 {
01832 Mul_Begin(8)
01833 #ifndef __GNUC__
01834 ASJ( jmp, 0, f)
01835 Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01836 AS1( ret) ASL(0)
01837 #endif
01838 Mul_Column0(0, 2)
01839 Mul_Column1(1, 3)
01840 Mul_Column0(2, 4)
01841 Mul_Column1(3, 5)
01842 Mul_Column0(4, 6)
01843 Mul_Column1(5, 7)
01844 Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01845 Bot_End(8)
01846 }
01847
01848 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
01849 {
01850 Mul_Begin(16)
01851 #ifndef __GNUC__
01852 ASJ( jmp, 0, f)
01853 Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01854 AS1( ret) ASL(0)
01855 #endif
01856 Mul_Column0(0, 2)
01857 Mul_Column1(1, 3)
01858 Mul_Column0(2, 4)
01859 Mul_Column1(3, 5)
01860 Mul_Column0(4, 6)
01861 Mul_Column1(5, 7)
01862 Mul_Column0(6, 8)
01863 Mul_Column1(7, 9)
01864 Mul_Column0(8, 10)
01865 Mul_Column1(9, 11)
01866 Mul_Column0(10, 12)
01867 Mul_Column1(11, 13)
01868 Mul_Column0(12, 14)
01869 Mul_Column1(13, 15)
01870 Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01871 Bot_End(16)
01872 }
01873
01874 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
01875 {
01876 Top_Begin(4)
01877 Top_Acc(3) Top_Acc(2) Top_Acc(1)
01878 #ifndef __GNUC__
01879 ASJ( jmp, 0, f)
01880 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01881 AS1( ret) ASL(0)
01882 #endif
01883 Top_Column0(4)
01884 Top_Column1(3)
01885 Mul_Column0(0, 2)
01886 Top_End(2)
01887 }
01888
01889 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
01890 {
01891 Top_Begin(8)
01892 Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01893 #ifndef __GNUC__
01894 ASJ( jmp, 0, f)
01895 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01896 AS1( ret) ASL(0)
01897 #endif
01898 Top_Column0(8)
01899 Top_Column1(7)
01900 Mul_Column0(0, 6)
01901 Mul_Column1(1, 5)
01902 Mul_Column0(2, 4)
01903 Mul_Column1(3, 3)
01904 Mul_Column0(4, 2)
01905 Top_End(4)
01906 }
01907
01908 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
01909 {
01910 Top_Begin(16)
01911 Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01912 #ifndef __GNUC__
01913 ASJ( jmp, 0, f)
01914 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01915 AS1( ret) ASL(0)
01916 #endif
01917 Top_Column0(16)
01918 Top_Column1(15)
01919 Mul_Column0(0, 14)
01920 Mul_Column1(1, 13)
01921 Mul_Column0(2, 12)
01922 Mul_Column1(3, 11)
01923 Mul_Column0(4, 10)
01924 Mul_Column1(5, 9)
01925 Mul_Column0(6, 8)
01926 Mul_Column1(7, 7)
01927 Mul_Column0(8, 6)
01928 Mul_Column1(9, 5)
01929 Mul_Column0(10, 4)
01930 Mul_Column1(11, 3)
01931 Mul_Column0(12, 2)
01932 Top_End(8)
01933 }
01934
01935 #endif // #if CRYPTOPP_INTEGER_SSE2
01936
01937
01938
01939 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
01940 typedef void (* PMul)(word *C, const word *A, const word *B);
01941 typedef void (* PSqu)(word *C, const word *A);
01942 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
01943
01944 #if CRYPTOPP_INTEGER_SSE2
01945 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
01946 static size_t s_recursionLimit = 8;
01947 #else
01948 static const size_t s_recursionLimit = 16;
01949 #endif
01950
01951 static PMul s_pMul[9], s_pBot[9];
01952 static PSqu s_pSqu[9];
01953 static PMulTop s_pTop[9];
01954
01955 static void SetFunctionPointers()
01956 {
01957 s_pMul[0] = &Baseline_Multiply2;
01958 s_pBot[0] = &Baseline_MultiplyBottom2;
01959 s_pSqu[0] = &Baseline_Square2;
01960 s_pTop[0] = &Baseline_MultiplyTop2;
01961 s_pTop[1] = &Baseline_MultiplyTop4;
01962
01963 #if CRYPTOPP_INTEGER_SSE2
01964 if (HasSSE2())
01965 {
01966 #if _MSC_VER != 1200 || defined(NDEBUG)
01967 if (IsP4())
01968 {
01969 s_pAdd = &SSE2_Add;
01970 s_pSub = &SSE2_Sub;
01971 }
01972 #endif
01973
01974 s_recursionLimit = 32;
01975
01976 s_pMul[1] = &SSE2_Multiply4;
01977 s_pMul[2] = &SSE2_Multiply8;
01978 s_pMul[4] = &SSE2_Multiply16;
01979 s_pMul[8] = &SSE2_Multiply32;
01980
01981 s_pBot[1] = &SSE2_MultiplyBottom4;
01982 s_pBot[2] = &SSE2_MultiplyBottom8;
01983 s_pBot[4] = &SSE2_MultiplyBottom16;
01984 s_pBot[8] = &SSE2_MultiplyBottom32;
01985
01986 s_pSqu[1] = &SSE2_Square4;
01987 s_pSqu[2] = &SSE2_Square8;
01988 s_pSqu[4] = &SSE2_Square16;
01989 s_pSqu[8] = &SSE2_Square32;
01990
01991 s_pTop[2] = &SSE2_MultiplyTop8;
01992 s_pTop[4] = &SSE2_MultiplyTop16;
01993 s_pTop[8] = &SSE2_MultiplyTop32;
01994 }
01995 else
01996 #endif
01997 {
01998 s_pMul[1] = &Baseline_Multiply4;
01999 s_pMul[2] = &Baseline_Multiply8;
02000
02001 s_pBot[1] = &Baseline_MultiplyBottom4;
02002 s_pBot[2] = &Baseline_MultiplyBottom8;
02003
02004 s_pSqu[1] = &Baseline_Square4;
02005 s_pSqu[2] = &Baseline_Square8;
02006
02007 s_pTop[2] = &Baseline_MultiplyTop8;
02008
02009 #if !CRYPTOPP_INTEGER_SSE2
02010 s_pMul[4] = &Baseline_Multiply16;
02011 s_pBot[4] = &Baseline_MultiplyBottom16;
02012 s_pSqu[4] = &Baseline_Square16;
02013 s_pTop[4] = &Baseline_MultiplyTop16;
02014 #endif
02015 }
02016 }
02017
02018 inline int Add(word *C, const word *A, const word *B, size_t N)
02019 {
02020 #if CRYPTOPP_INTEGER_SSE2
02021 return s_pAdd(N, C, A, B);
02022 #else
02023 return Baseline_Add(N, C, A, B);
02024 #endif
02025 }
02026
02027 inline int Subtract(word *C, const word *A, const word *B, size_t N)
02028 {
02029 #if CRYPTOPP_INTEGER_SSE2
02030 return s_pSub(N, C, A, B);
02031 #else
02032 return Baseline_Sub(N, C, A, B);
02033 #endif
02034 }
02035
02036
02037
02038
02039 #define A0 A
02040 #define A1 (A+N2)
02041 #define B0 B
02042 #define B1 (B+N2)
02043
02044 #define T0 T
02045 #define T1 (T+N2)
02046 #define T2 (T+N)
02047 #define T3 (T+N+N2)
02048
02049 #define R0 R
02050 #define R1 (R+N2)
02051 #define R2 (R+N)
02052 #define R3 (R+N+N2)
02053
02054
02055
02056
02057
02058
02059 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
02060 {
02061 assert(N>=2 && N%2==0);
02062
02063 if (N <= s_recursionLimit)
02064 s_pMul[N/4](R, A, B);
02065 else
02066 {
02067 const size_t N2 = N/2;
02068
02069 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
02070 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02071
02072 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
02073 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02074
02075 RecursiveMultiply(R2, T2, A1, B1, N2);
02076 RecursiveMultiply(T0, T2, R0, R1, N2);
02077 RecursiveMultiply(R0, T2, A0, B0, N2);
02078
02079
02080
02081 int c2 = Add(R2, R2, R1, N2);
02082 int c3 = c2;
02083 c2 += Add(R1, R2, R0, N2);
02084 c3 += Add(R2, R2, R3, N2);
02085
02086 if (AN2 == BN2)
02087 c3 -= Subtract(R1, R1, T0, N);
02088 else
02089 c3 += Add(R1, R1, T0, N);
02090
02091 c3 += Increment(R2, N2, c2);
02092 assert (c3 >= 0 && c3 <= 2);
02093 Increment(R3, N2, c3);
02094 }
02095 }
02096
02097
02098
02099
02100
02101 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
02102 {
02103 assert(N && N%2==0);
02104
02105 if (N <= s_recursionLimit)
02106 s_pSqu[N/4](R, A);
02107 else
02108 {
02109 const size_t N2 = N/2;
02110
02111 RecursiveSquare(R0, T2, A0, N2);
02112 RecursiveSquare(R2, T2, A1, N2);
02113 RecursiveMultiply(T0, T2, A0, A1, N2);
02114
02115 int carry = Add(R1, R1, T0, N);
02116 carry += Add(R1, R1, T0, N);
02117 Increment(R3, N2, carry);
02118 }
02119 }
02120
02121
02122
02123
02124
02125
02126 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02127 {
02128 assert(N>=2 && N%2==0);
02129
02130 if (N <= s_recursionLimit)
02131 s_pBot[N/4](R, A, B);
02132 else
02133 {
02134 const size_t N2 = N/2;
02135
02136 RecursiveMultiply(R, T, A0, B0, N2);
02137 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02138 Add(R1, R1, T0, N2);
02139 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02140 Add(R1, R1, T0, N2);
02141 }
02142 }
02143
02144
02145
02146
02147
02148
02149
02150 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02151 {
02152 assert(N>=2 && N%2==0);
02153
02154 if (N <= s_recursionLimit)
02155 s_pTop[N/4](R, A, B, L[N-1]);
02156 else
02157 {
02158 const size_t N2 = N/2;
02159
02160 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
02161 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02162
02163 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
02164 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02165
02166 RecursiveMultiply(T0, T2, R0, R1, N2);
02167 RecursiveMultiply(R0, T2, A1, B1, N2);
02168
02169
02170
02171 int t, c3;
02172 int c2 = Subtract(T2, L+N2, L, N2);
02173
02174 if (AN2 == BN2)
02175 {
02176 c2 -= Add(T2, T2, T0, N2);
02177 t = (Compare(T2, R0, N2) == -1);
02178 c3 = t - Subtract(T2, T2, T1, N2);
02179 }
02180 else
02181 {
02182 c2 += Subtract(T2, T2, T0, N2);
02183 t = (Compare(T2, R0, N2) == -1);
02184 c3 = t + Add(T2, T2, T1, N2);
02185 }
02186
02187 c2 += t;
02188 if (c2 >= 0)
02189 c3 += Increment(T2, N2, c2);
02190 else
02191 c3 -= Decrement(T2, N2, -c2);
02192 c3 += Add(R0, T2, R1, N2);
02193
02194 assert (c3 >= 0 && c3 <= 2);
02195 Increment(R1, N2, c3);
02196 }
02197 }
02198
02199 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
02200 {
02201 RecursiveMultiply(R, T, A, B, N);
02202 }
02203
02204 inline void Square(word *R, word *T, const word *A, size_t N)
02205 {
02206 RecursiveSquare(R, T, A, N);
02207 }
02208
02209 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02210 {
02211 RecursiveMultiplyBottom(R, T, A, B, N);
02212 }
02213
02214
02215
02216
02217
02218
02219 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
02220 {
02221 if (NA == NB)
02222 {
02223 if (A == B)
02224 Square(R, T, A, NA);
02225 else
02226 Multiply(R, T, A, B, NA);
02227
02228 return;
02229 }
02230
02231 if (NA > NB)
02232 {
02233 std::swap(A, B);
02234 std::swap(NA, NB);
02235 }
02236
02237 assert(NB % NA == 0);
02238
02239 if (NA==2 && !A[1])
02240 {
02241 switch (A[0])
02242 {
02243 case 0:
02244 SetWords(R, 0, NB+2);
02245 return;
02246 case 1:
02247 CopyWords(R, B, NB);
02248 R[NB] = R[NB+1] = 0;
02249 return;
02250 default:
02251 R[NB] = LinearMultiply(R, B, A[0], NB);
02252 R[NB+1] = 0;
02253 return;
02254 }
02255 }
02256
02257 size_t i;
02258 if ((NB/NA)%2 == 0)
02259 {
02260 Multiply(R, T, A, B, NA);
02261 CopyWords(T+2*NA, R+NA, NA);
02262
02263 for (i=2*NA; i<NB; i+=2*NA)
02264 Multiply(T+NA+i, T, A, B+i, NA);
02265 for (i=NA; i<NB; i+=2*NA)
02266 Multiply(R+i, T, A, B+i, NA);
02267 }
02268 else
02269 {
02270 for (i=0; i<NB; i+=2*NA)
02271 Multiply(R+i, T, A, B+i, NA);
02272 for (i=NA; i<NB; i+=2*NA)
02273 Multiply(T+NA+i, T, A, B+i, NA);
02274 }
02275
02276 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02277 Increment(R+NB, NA);
02278 }
02279
02280
02281
02282
02283
02284 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
02285 {
02286 if (N==2)
02287 {
02288 T[0] = AtomicInverseModPower2(A[0]);
02289 T[1] = 0;
02290 s_pBot[0](T+2, T, A);
02291 TwosComplement(T+2, 2);
02292 Increment(T+2, 2, 2);
02293 s_pBot[0](R, T, T+2);
02294 }
02295 else
02296 {
02297 const size_t N2 = N/2;
02298 RecursiveInverseModPower2(R0, T0, A0, N2);
02299 T0[0] = 1;
02300 SetWords(T0+1, 0, N2-1);
02301 MultiplyTop(R1, T1, T0, R0, A0, N2);
02302 MultiplyBottom(T0, T1, R0, A1, N2);
02303 Add(T0, R1, T0, N2);
02304 TwosComplement(T0, N2);
02305 MultiplyBottom(R1, T1, R0, T0, N2);
02306 }
02307 }
02308
02309
02310
02311
02312
02313
02314
02315 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
02316 {
02317 #if 1
02318 MultiplyBottom(R, T, X, U, N);
02319 MultiplyTop(T, T+N, X, R, M, N);
02320 word borrow = Subtract(T, X+N, T, N);
02321
02322 word carry = Add(T+N, T, M, N);
02323 assert(carry | !borrow);
02324 CopyWords(R, T + ((0-borrow) & N), N);
02325 #elif 0
02326 const word u = 0-U[0];
02327 Declare2Words(p)
02328 for (size_t i=0; i<N; i++)
02329 {
02330 const word t = u * X[i];
02331 word c = 0;
02332 for (size_t j=0; j<N; j+=2)
02333 {
02334 MultiplyWords(p, t, M[j]);
02335 Acc2WordsBy1(p, X[i+j]);
02336 Acc2WordsBy1(p, c);
02337 X[i+j] = LowWord(p);
02338 c = HighWord(p);
02339 MultiplyWords(p, t, M[j+1]);
02340 Acc2WordsBy1(p, X[i+j+1]);
02341 Acc2WordsBy1(p, c);
02342 X[i+j+1] = LowWord(p);
02343 c = HighWord(p);
02344 }
02345
02346 if (Increment(X+N+i, N-i, c))
02347 while (!Subtract(X+N, X+N, M, N)) {}
02348 }
02349
02350 memcpy(R, X+N, N*WORD_SIZE);
02351 #else
02352 __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
02353 for (size_t i=0; i<N; i++)
02354 {
02355 __m64 t = _mm_cvtsi32_si64(X[i]);
02356 t = _mm_mul_su32(t, u);
02357 __m64 c = _mm_setzero_si64();
02358 for (size_t j=0; j<N; j+=2)
02359 {
02360 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
02361 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
02362 c = _mm_add_si64(c, p);
02363 X[i+j] = _mm_cvtsi64_si32(c);
02364 c = _mm_srli_si64(c, 32);
02365 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
02366 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
02367 c = _mm_add_si64(c, p);
02368 X[i+j+1] = _mm_cvtsi64_si32(c);
02369 c = _mm_srli_si64(c, 32);
02370 }
02371
02372 if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
02373 while (!Subtract(X+N, X+N, M, N)) {}
02374 }
02375
02376 memcpy(R, X+N, N*WORD_SIZE);
02377 _mm_empty();
02378 #endif
02379 }
02380
02381
02382
02383
02384
02385
02386
02387
02388 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
02389 {
02390 assert(N%2==0 && N>=4);
02391
02392 #define M0 M
02393 #define M1 (M+N2)
02394 #define V0 V
02395 #define V1 (V+N2)
02396
02397 #define X0 X
02398 #define X1 (X+N2)
02399 #define X2 (X+N)
02400 #define X3 (X+N+N2)
02401
02402 const size_t N2 = N/2;
02403 Multiply(T0, T2, V0, X3, N2);
02404 int c2 = Add(T0, T0, X0, N);
02405 MultiplyBottom(T3, T2, T0, U, N2);
02406 MultiplyTop(T2, R, T0, T3, M0, N2);
02407 c2 -= Subtract(T2, T1, T2, N2);
02408 Multiply(T0, R, T3, M1, N2);
02409 c2 -= Subtract(T0, T2, T0, N2);
02410 int c3 = -(int)Subtract(T1, X2, T1, N2);
02411 Multiply(R0, T2, V1, X3, N2);
02412 c3 += Add(R, R, T, N);
02413
02414 if (c2>0)
02415 c3 += Increment(R1, N2);
02416 else if (c2<0)
02417 c3 -= Decrement(R1, N2, -c2);
02418
02419 assert(c3>=-1 && c3<=1);
02420 if (c3>0)
02421 Subtract(R, R, M, N);
02422 else if (c3<0)
02423 Add(R, R, M, N);
02424
02425 #undef M0
02426 #undef M1
02427 #undef V0
02428 #undef V1
02429
02430 #undef X0
02431 #undef X1
02432 #undef X2
02433 #undef X3
02434 }
02435
02436 #undef A0
02437 #undef A1
02438 #undef B0
02439 #undef B1
02440
02441 #undef T0
02442 #undef T1
02443 #undef T2
02444 #undef T3
02445
02446 #undef R0
02447 #undef R1
02448 #undef R2
02449 #undef R3
02450
02451
02452
02453
02454
02455
02456
02457
02458
02459
02460
02461
02462
02463
02464
02465
02466
02467
02468
02469
02470
02471
02472
02473
02474
02475
02476
02477
02478
02479
02480
02481
02482
02483
02484
02485
02486
02487
02488
02489
02490
02491
02492
02493
02494
02495
02496
02497
02498
02499
02500
02501
02502
02503
02504
02505
02506
02507
02508
02509
02510
02511
02512
02513
02514
02515 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02516 {
02517 word T[4];
02518 DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02519 Q[0] = q.GetLowHalf();
02520 Q[1] = q.GetHighHalf();
02521
02522 #ifndef NDEBUG
02523 if (B[0] || B[1])
02524 {
02525
02526 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02527 word P[4];
02528 s_pMul[0](P, Q, B);
02529 Add(P, P, T, 4);
02530 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02531 }
02532 #endif
02533 }
02534
02535
02536 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
02537 {
02538 assert(N && N%2==0);
02539
02540 AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
02541
02542 word borrow = Subtract(R, R, T, N+2);
02543 assert(!borrow && !R[N+1]);
02544
02545 while (R[N] || Compare(R, B, N) >= 0)
02546 {
02547 R[N] -= Subtract(R, R, B, N);
02548 Q[1] += (++Q[0]==0);
02549 assert(Q[0] || Q[1]);
02550 }
02551 }
02552
02553
02554
02555
02556
02557
02558
02559 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
02560 {
02561 assert(NA && NB && NA%2==0 && NB%2==0);
02562 assert(B[NB-1] || B[NB-2]);
02563 assert(NB <= NA);
02564
02565
02566 word *const TA=T;
02567 word *const TB=T+NA+2;
02568 word *const TP=T+NA+2+NB;
02569
02570
02571 unsigned shiftWords = (B[NB-1]==0);
02572 TB[0] = TB[NB-1] = 0;
02573 CopyWords(TB+shiftWords, B, NB-shiftWords);
02574 unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02575 assert(shiftBits < WORD_BITS);
02576 ShiftWordsLeftByBits(TB, NB, shiftBits);
02577
02578
02579 TA[0] = TA[NA] = TA[NA+1] = 0;
02580 CopyWords(TA+shiftWords, A, NA);
02581 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02582
02583 if (TA[NA+1]==0 && TA[NA] <= 1)
02584 {
02585 Q[NA-NB+1] = Q[NA-NB] = 0;
02586 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02587 {
02588 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02589 ++Q[NA-NB];
02590 }
02591 }
02592 else
02593 {
02594 NA+=2;
02595 assert(Compare(TA+NA-NB, TB, NB) < 0);
02596 }
02597
02598 word BT[2];
02599 BT[0] = TB[NB-2] + 1;
02600 BT[1] = TB[NB-1] + (BT[0]==0);
02601
02602
02603 for (size_t i=NA-2; i>=NB; i-=2)
02604 {
02605 AtomicDivide(Q+i-NB, TA+i-2, BT);
02606 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02607 }
02608
02609
02610 CopyWords(R, TA+shiftWords, NB);
02611 ShiftWordsRightByBits(R, NB, shiftBits);
02612 }
02613
02614 static inline size_t EvenWordCount(const word *X, size_t N)
02615 {
02616 while (N && X[N-2]==0 && X[N-1]==0)
02617 N-=2;
02618 return N;
02619 }
02620
02621
02622
02623
02624
02625
02626
02627 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
02628 {
02629 assert(NA<=N && N && N%2==0);
02630
02631 word *b = T;
02632 word *c = T+N;
02633 word *f = T+2*N;
02634 word *g = T+3*N;
02635 size_t bcLen=2, fgLen=EvenWordCount(M, N);
02636 unsigned int k=0;
02637 bool s=false;
02638
02639 SetWords(T, 0, 3*N);
02640 b[0]=1;
02641 CopyWords(f, A, NA);
02642 CopyWords(g, M, N);
02643
02644 while (1)
02645 {
02646 word t=f[0];
02647 while (!t)
02648 {
02649 if (EvenWordCount(f, fgLen)==0)
02650 {
02651 SetWords(R, 0, N);
02652 return 0;
02653 }
02654
02655 ShiftWordsRightByWords(f, fgLen, 1);
02656 bcLen += 2 * (c[bcLen-1] != 0);
02657 assert(bcLen <= N);
02658 ShiftWordsLeftByWords(c, bcLen, 1);
02659 k+=WORD_BITS;
02660 t=f[0];
02661 }
02662
02663 unsigned int i = TrailingZeros(t);
02664 t >>= i;
02665 k += i;
02666
02667 if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
02668 {
02669 if (s)
02670 Subtract(R, M, b, N);
02671 else
02672 CopyWords(R, b, N);
02673 return k;
02674 }
02675
02676 ShiftWordsRightByBits(f, fgLen, i);
02677 t = ShiftWordsLeftByBits(c, bcLen, i);
02678 c[bcLen] += t;
02679 bcLen += 2 * (t!=0);
02680 assert(bcLen <= N);
02681
02682 bool swap = Compare(f, g, fgLen)==-1;
02683 ConditionalSwapPointers(swap, f, g);
02684 ConditionalSwapPointers(swap, b, c);
02685 s ^= swap;
02686
02687 fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
02688
02689 Subtract(f, f, g, fgLen);
02690 t = Add(b, b, c, bcLen);
02691 b[bcLen] += t;
02692 bcLen += 2*t;
02693 assert(bcLen <= N);
02694 }
02695 }
02696
02697
02698
02699
02700
02701 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02702 {
02703 CopyWords(R, A, N);
02704
02705 while (k--)
02706 {
02707 if (R[0]%2==0)
02708 ShiftWordsRightByBits(R, N, 1);
02709 else
02710 {
02711 word carry = Add(R, R, M, N);
02712 ShiftWordsRightByBits(R, N, 1);
02713 R[N-1] += carry<<(WORD_BITS-1);
02714 }
02715 }
02716 }
02717
02718
02719
02720
02721
02722 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02723 {
02724 CopyWords(R, A, N);
02725
02726 while (k--)
02727 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02728 Subtract(R, R, M, N);
02729 }
02730
02731
02732
02733 InitializeInteger::InitializeInteger()
02734 {
02735 if (!g_pAssignIntToInteger)
02736 {
02737 SetFunctionPointers();
02738 g_pAssignIntToInteger = AssignIntToInteger;
02739 }
02740 }
02741
02742 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02743
02744 static inline size_t RoundupSize(size_t n)
02745 {
02746 if (n<=8)
02747 return RoundupSizeTable[n];
02748 else if (n<=16)
02749 return 16;
02750 else if (n<=32)
02751 return 32;
02752 else if (n<=64)
02753 return 64;
02754 else return size_t(1) << BitPrecision(n-1);
02755 }
02756
02757 Integer::Integer()
02758 : reg(2), sign(POSITIVE)
02759 {
02760 reg[0] = reg[1] = 0;
02761 }
02762
02763 Integer::Integer(const Integer& t)
02764 : reg(RoundupSize(t.WordCount())), sign(t.sign)
02765 {
02766 CopyWords(reg, t.reg, reg.size());
02767 }
02768
02769 Integer::Integer(Sign s, lword value)
02770 : reg(2), sign(s)
02771 {
02772 reg[0] = word(value);
02773 reg[1] = word(SafeRightShift<WORD_BITS>(value));
02774 }
02775
02776 Integer::Integer(signed long value)
02777 : reg(2)
02778 {
02779 if (value >= 0)
02780 sign = POSITIVE;
02781 else
02782 {
02783 sign = NEGATIVE;
02784 value = -value;
02785 }
02786 reg[0] = word(value);
02787 reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
02788 }
02789
02790 Integer::Integer(Sign s, word high, word low)
02791 : reg(2), sign(s)
02792 {
02793 reg[0] = low;
02794 reg[1] = high;
02795 }
02796
02797 bool Integer::IsConvertableToLong() const
02798 {
02799 if (ByteCount() > sizeof(long))
02800 return false;
02801
02802 unsigned long value = (unsigned long)reg[0];
02803 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02804
02805 if (sign==POSITIVE)
02806 return (signed long)value >= 0;
02807 else
02808 return -(signed long)value < 0;
02809 }
02810
02811 signed long Integer::ConvertToLong() const
02812 {
02813 assert(IsConvertableToLong());
02814
02815 unsigned long value = (unsigned long)reg[0];
02816 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02817 return sign==POSITIVE ? value : -(signed long)value;
02818 }
02819
02820 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s)
02821 {
02822 Decode(encodedInteger, byteCount, s);
02823 }
02824
02825 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s)
02826 {
02827 Decode(encodedInteger, byteCount, s);
02828 }
02829
02830 Integer::Integer(BufferedTransformation &bt)
02831 {
02832 BERDecode(bt);
02833 }
02834
02835 Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
02836 {
02837 Randomize(rng, bitcount);
02838 }
02839
02840 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02841 {
02842 if (!Randomize(rng, min, max, rnType, equiv, mod))
02843 throw Integer::RandomNumberNotFound();
02844 }
02845
02846 Integer Integer::Power2(size_t e)
02847 {
02848 Integer r((word)0, BitsToWords(e+1));
02849 r.SetBit(e);
02850 return r;
02851 }
02852
02853 template <long i>
02854 struct NewInteger
02855 {
02856 Integer * operator()() const
02857 {
02858 return new Integer(i);
02859 }
02860 };
02861
02862 const Integer &Integer::Zero()
02863 {
02864 return Singleton<Integer>().Ref();
02865 }
02866
02867 const Integer &Integer::One()
02868 {
02869 return Singleton<Integer, NewInteger<1> >().Ref();
02870 }
02871
02872 const Integer &Integer::Two()
02873 {
02874 return Singleton<Integer, NewInteger<2> >().Ref();
02875 }
02876
02877 bool Integer::operator!() const
02878 {
02879 return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02880 }
02881
02882 Integer& Integer::operator=(const Integer& t)
02883 {
02884 if (this != &t)
02885 {
02886 if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
02887 reg.New(RoundupSize(t.WordCount()));
02888 CopyWords(reg, t.reg, reg.size());
02889 sign = t.sign;
02890 }
02891 return *this;
02892 }
02893
02894 bool Integer::GetBit(size_t n) const
02895 {
02896 if (n/WORD_BITS >= reg.size())
02897 return 0;
02898 else
02899 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02900 }
02901
02902 void Integer::SetBit(size_t n, bool value)
02903 {
02904 if (value)
02905 {
02906 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02907 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02908 }
02909 else
02910 {
02911 if (n/WORD_BITS < reg.size())
02912 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02913 }
02914 }
02915
02916 byte Integer::GetByte(size_t n) const
02917 {
02918 if (n/WORD_SIZE >= reg.size())
02919 return 0;
02920 else
02921 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02922 }
02923
02924 void Integer::SetByte(size_t n, byte value)
02925 {
02926 reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02927 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02928 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02929 }
02930
02931 lword Integer::GetBits(size_t i, size_t n) const
02932 {
02933 lword v = 0;
02934 assert(n <= sizeof(v)*8);
02935 for (unsigned int j=0; j<n; j++)
02936 v |= lword(GetBit(i+j)) << j;
02937 return v;
02938 }
02939
02940 Integer Integer::operator-() const
02941 {
02942 Integer result(*this);
02943 result.Negate();
02944 return result;
02945 }
02946
02947 Integer Integer::AbsoluteValue() const
02948 {
02949 Integer result(*this);
02950 result.sign = POSITIVE;
02951 return result;
02952 }
02953
02954 void Integer::swap(Integer &a)
02955 {
02956 reg.swap(a.reg);
02957 std::swap(sign, a.sign);
02958 }
02959
02960 Integer::Integer(word value, size_t length)
02961 : reg(RoundupSize(length)), sign(POSITIVE)
02962 {
02963 reg[0] = value;
02964 SetWords(reg+1, 0, reg.size()-1);
02965 }
02966
02967 template <class T>
02968 static Integer StringToInteger(const T *str)
02969 {
02970 int radix;
02971
02972
02973 unsigned int length;
02974 for (length = 0; str[length] != 0; length++) {}
02975
02976 Integer v;
02977
02978 if (length == 0)
02979 return v;
02980
02981 switch (str[length-1])
02982 {
02983 case 'h':
02984 case 'H':
02985 radix=16;
02986 break;
02987 case 'o':
02988 case 'O':
02989 radix=8;
02990 break;
02991 case 'b':
02992 case 'B':
02993 radix=2;
02994 break;
02995 default:
02996 radix=10;
02997 }
02998
02999 if (length > 2 && str[0] == '0' && str[1] == 'x')
03000 radix = 16;
03001
03002 for (unsigned i=0; i<length; i++)
03003 {
03004 int digit;
03005
03006 if (str[i] >= '0' && str[i] <= '9')
03007 digit = str[i] - '0';
03008 else if (str[i] >= 'A' && str[i] <= 'F')
03009 digit = str[i] - 'A' + 10;
03010 else if (str[i] >= 'a' && str[i] <= 'f')
03011 digit = str[i] - 'a' + 10;
03012 else
03013 digit = radix;
03014
03015 if (digit < radix)
03016 {
03017 v *= radix;
03018 v += digit;
03019 }
03020 }
03021
03022 if (str[0] == '-')
03023 v.Negate();
03024
03025 return v;
03026 }
03027
03028 Integer::Integer(const char *str)
03029 : reg(2), sign(POSITIVE)
03030 {
03031 *this = StringToInteger(str);
03032 }
03033
03034 Integer::Integer(const wchar_t *str)
03035 : reg(2), sign(POSITIVE)
03036 {
03037 *this = StringToInteger(str);
03038 }
03039
03040 unsigned int Integer::WordCount() const
03041 {
03042 return (unsigned int)CountWords(reg, reg.size());
03043 }
03044
03045 unsigned int Integer::ByteCount() const
03046 {
03047 unsigned wordCount = WordCount();
03048 if (wordCount)
03049 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03050 else
03051 return 0;
03052 }
03053
03054 unsigned int Integer::BitCount() const
03055 {
03056 unsigned wordCount = WordCount();
03057 if (wordCount)
03058 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03059 else
03060 return 0;
03061 }
03062
03063 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
03064 {
03065 StringStore store(input, inputLen);
03066 Decode(store, inputLen, s);
03067 }
03068
03069 void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
03070 {
03071 assert(bt.MaxRetrievable() >= inputLen);
03072
03073 byte b;
03074 bt.Peek(b);
03075 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03076
03077 while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03078 {
03079 bt.Skip(1);
03080 inputLen--;
03081 bt.Peek(b);
03082 }
03083
03084 reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
03085
03086 for (size_t i=inputLen; i > 0; i--)
03087 {
03088 bt.Get(b);
03089 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03090 }
03091
03092 if (sign == NEGATIVE)
03093 {
03094 for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
03095 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03096 TwosComplement(reg, reg.size());
03097 }
03098 }
03099
03100 size_t Integer::MinEncodedSize(Signedness signedness) const
03101 {
03102 unsigned int outputLen = STDMAX(1U, ByteCount());
03103 if (signedness == UNSIGNED)
03104 return outputLen;
03105 if (NotNegative() && (GetByte(outputLen-1) & 0x80))
03106 outputLen++;
03107 if (IsNegative() && *this < -Power2(outputLen*8-1))
03108 outputLen++;
03109 return outputLen;
03110 }
03111
03112 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
03113 {
03114 ArraySink sink(output, outputLen);
03115 Encode(sink, outputLen, signedness);
03116 }
03117
03118 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
03119 {
03120 if (signedness == UNSIGNED || NotNegative())
03121 {
03122 for (size_t i=outputLen; i > 0; i--)
03123 bt.Put(GetByte(i-1));
03124 }
03125 else
03126 {
03127
03128 Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
03129 temp.Encode(bt, outputLen, UNSIGNED);
03130 }
03131 }
03132
03133 void Integer::DEREncode(BufferedTransformation &bt) const
03134 {
03135 DERGeneralEncoder enc(bt, INTEGER);
03136 Encode(enc, MinEncodedSize(SIGNED), SIGNED);
03137 enc.MessageEnd();
03138 }
03139
03140 void Integer::BERDecode(const byte *input, size_t len)
03141 {
03142 StringStore store(input, len);
03143 BERDecode(store);
03144 }
03145
03146 void Integer::BERDecode(BufferedTransformation &bt)
03147 {
03148 BERGeneralDecoder dec(bt, INTEGER);
03149 if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
03150 BERDecodeError();
03151 Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
03152 dec.MessageEnd();
03153 }
03154
03155 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
03156 {
03157 DERGeneralEncoder enc(bt, OCTET_STRING);
03158 Encode(enc, length);
03159 enc.MessageEnd();
03160 }
03161
03162 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
03163 {
03164 BERGeneralDecoder dec(bt, OCTET_STRING);
03165 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
03166 BERDecodeError();
03167 Decode(dec, length);
03168 dec.MessageEnd();
03169 }
03170
03171 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
03172 {
03173 ArraySink sink(output, len);
03174 return OpenPGPEncode(sink);
03175 }
03176
03177 size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
03178 {
03179 word16 bitCount = BitCount();
03180 bt.PutWord16(bitCount);
03181 size_t byteCount = BitsToBytes(bitCount);
03182 Encode(bt, byteCount);
03183 return 2 + byteCount;
03184 }
03185
03186 void Integer::OpenPGPDecode(const byte *input, size_t len)
03187 {
03188 StringStore store(input, len);
03189 OpenPGPDecode(store);
03190 }
03191
03192 void Integer::OpenPGPDecode(BufferedTransformation &bt)
03193 {
03194 word16 bitCount;
03195 if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
03196 throw OpenPGPDecodeErr();
03197 Decode(bt, BitsToBytes(bitCount));
03198 }
03199
03200 void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
03201 {
03202 const size_t nbytes = nbits/8 + 1;
03203 SecByteBlock buf(nbytes);
03204 rng.GenerateBlock(buf, nbytes);
03205 if (nbytes)
03206 buf[0] = (byte)Crop(buf[0], nbits % 8);
03207 Decode(buf, nbytes, UNSIGNED);
03208 }
03209
03210 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
03211 {
03212 if (min > max)
03213 throw InvalidArgument("Integer: Min must be no greater than Max");
03214
03215 Integer range = max - min;
03216 const unsigned int nbits = range.BitCount();
03217
03218 do
03219 {
03220 Randomize(rng, nbits);
03221 }
03222 while (*this > range);
03223
03224 *this += min;
03225 }
03226
03227 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
03228 {
03229 return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
03230 }
03231
03232 class KDF2_RNG : public RandomNumberGenerator
03233 {
03234 public:
03235 KDF2_RNG(const byte *seed, size_t seedSize)
03236 : m_counter(0), m_counterAndSeed(seedSize + 4)
03237 {
03238 memcpy(m_counterAndSeed + 4, seed, seedSize);
03239 }
03240
03241 void GenerateBlock(byte *output, size_t size)
03242 {
03243 PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03244 ++m_counter;
03245 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03246 }
03247
03248 private:
03249 word32 m_counter;
03250 SecByteBlock m_counterAndSeed;
03251 };
03252
03253 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms)
03254 {
03255 Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03256 Integer max;
03257 if (!params.GetValue("Max", max))
03258 {
03259 int bitLength;
03260 if (params.GetIntValue("BitLength", bitLength))
03261 max = Integer::Power2(bitLength);
03262 else
03263 throw InvalidArgument("Integer: missing Max argument");
03264 }
03265 if (min > max)
03266 throw InvalidArgument("Integer: Min must be no greater than Max");
03267
03268 Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03269 Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03270
03271 if (equiv.IsNegative() || equiv >= mod)
03272 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03273
03274 Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03275
03276 member_ptr<KDF2_RNG> kdf2Rng;
03277 ConstByteArrayParameter seed;
03278 if (params.GetValue(Name::Seed(), seed))
03279 {
03280 ByteQueue bq;
03281 DERSequenceEncoder seq(bq);
03282 min.DEREncode(seq);
03283 max.DEREncode(seq);
03284 equiv.DEREncode(seq);
03285 mod.DEREncode(seq);
03286 DEREncodeUnsigned(seq, rnType);
03287 DEREncodeOctetString(seq, seed.begin(), seed.size());
03288 seq.MessageEnd();
03289
03290 SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
03291 bq.Get(finalSeed, finalSeed.size());
03292 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03293 }
03294 RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03295
03296 switch (rnType)
03297 {
03298 case ANY:
03299 if (mod == One())
03300 Randomize(rng, min, max);
03301 else
03302 {
03303 Integer min1 = min + (equiv-min)%mod;
03304 if (max < min1)
03305 return false;
03306 Randomize(rng, Zero(), (max - min1) / mod);
03307 *this *= mod;
03308 *this += min1;
03309 }
03310 return true;
03311
03312 case PRIME:
03313 {
03314 const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
03315
03316 int i;
03317 i = 0;
03318 while (1)
03319 {
03320 if (++i==16)
03321 {
03322
03323 Integer first = min;
03324 if (FirstPrime(first, max, equiv, mod, pSelector))
03325 {
03326
03327 *this = first;
03328 if (!FirstPrime(first, max, equiv, mod, pSelector))
03329 return true;
03330 }
03331 else
03332 return false;
03333 }
03334
03335 Randomize(rng, min, max);
03336 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03337 return true;
03338 }
03339 }
03340
03341 default:
03342 throw InvalidArgument("Integer: invalid RandomNumberType argument");
03343 }
03344 }
03345
03346 std::istream& operator>>(std::istream& in, Integer &a)
03347 {
03348 char c;
03349 unsigned int length = 0;
03350 SecBlock<char> str(length + 16);
03351
03352 std::ws(in);
03353
03354 do
03355 {
03356 in.read(&c, 1);
03357 str[length++] = c;
03358 if (length >= str.size())
03359 str.Grow(length + 16);
03360 }
03361 while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03362
03363 if (in.gcount())
03364 in.putback(c);
03365 str[length-1] = '\0';
03366 a = Integer(str);
03367
03368 return in;
03369 }
03370
03371 std::ostream& operator<<(std::ostream& out, const Integer &a)
03372 {
03373
03374 long f = out.flags() & std::ios::basefield;
03375 int base, block;
03376 char suffix;
03377 switch(f)
03378 {
03379 case std::ios::oct :
03380 base = 8;
03381 block = 8;
03382 suffix = 'o';
03383 break;
03384 case std::ios::hex :
03385 base = 16;
03386 block = 4;
03387 suffix = 'h';
03388 break;
03389 default :
03390 base = 10;
03391 block = 3;
03392 suffix = '.';
03393 }
03394
03395 Integer temp1=a, temp2;
03396
03397 if (a.IsNegative())
03398 {
03399 out << '-';
03400 temp1.Negate();
03401 }
03402
03403 if (!a)
03404 out << '0';
03405
03406 static const char upper[]="0123456789ABCDEF";
03407 static const char lower[]="0123456789abcdef";
03408
03409 const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
03410 unsigned i=0;
03411 SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03412
03413 while (!!temp1)
03414 {
03415 word digit;
03416 Integer::Divide(digit, temp2, temp1, base);
03417 s[i++]=vec[digit];
03418 temp1.swap(temp2);
03419 }
03420
03421 while (i--)
03422 {
03423 out << s[i];
03424
03425
03426 }
03427 return out << suffix;
03428 }
03429
03430 Integer& Integer::operator++()
03431 {
03432 if (NotNegative())
03433 {
03434 if (Increment(reg, reg.size()))
03435 {
03436 reg.CleanGrow(2*reg.size());
03437 reg[reg.size()/2]=1;
03438 }
03439 }
03440 else
03441 {
03442 word borrow = Decrement(reg, reg.size());
03443 assert(!borrow);
03444 if (WordCount()==0)
03445 *this = Zero();
03446 }
03447 return *this;
03448 }
03449
03450 Integer& Integer::operator--()
03451 {
03452 if (IsNegative())
03453 {
03454 if (Increment(reg, reg.size()))
03455 {
03456 reg.CleanGrow(2*reg.size());
03457 reg[reg.size()/2]=1;
03458 }
03459 }
03460 else
03461 {
03462 if (Decrement(reg, reg.size()))
03463 *this = -One();
03464 }
03465 return *this;
03466 }
03467
03468 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03469 {
03470 int carry;
03471 if (a.reg.size() == b.reg.size())
03472 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03473 else if (a.reg.size() > b.reg.size())
03474 {
03475 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03476 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03477 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03478 }
03479 else
03480 {
03481 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03482 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03483 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03484 }
03485
03486 if (carry)
03487 {
03488 sum.reg.CleanGrow(2*sum.reg.size());
03489 sum.reg[sum.reg.size()/2] = 1;
03490 }
03491 sum.sign = Integer::POSITIVE;
03492 }
03493
03494 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03495 {
03496 unsigned aSize = a.WordCount();
03497 aSize += aSize%2;
03498 unsigned bSize = b.WordCount();
03499 bSize += bSize%2;
03500
03501 if (aSize == bSize)
03502 {
03503 if (Compare(a.reg, b.reg, aSize) >= 0)
03504 {
03505 Subtract(diff.reg, a.reg, b.reg, aSize);
03506 diff.sign = Integer::POSITIVE;
03507 }
03508 else
03509 {
03510 Subtract(diff.reg, b.reg, a.reg, aSize);
03511 diff.sign = Integer::NEGATIVE;
03512 }
03513 }
03514 else if (aSize > bSize)
03515 {
03516 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03517 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03518 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03519 assert(!borrow);
03520 diff.sign = Integer::POSITIVE;
03521 }
03522 else
03523 {
03524 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03525 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03526 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03527 assert(!borrow);
03528 diff.sign = Integer::NEGATIVE;
03529 }
03530 }
03531
03532
03533 template <class T> inline const T& STDMAX2(const T& a, const T& b)
03534 {
03535 return a < b ? b : a;
03536 }
03537
03538 Integer Integer::Plus(const Integer& b) const
03539 {
03540 Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
03541 if (NotNegative())
03542 {
03543 if (b.NotNegative())
03544 PositiveAdd(sum, *this, b);
03545 else
03546 PositiveSubtract(sum, *this, b);
03547 }
03548 else
03549 {
03550 if (b.NotNegative())
03551 PositiveSubtract(sum, b, *this);
03552 else
03553 {
03554 PositiveAdd(sum, *this, b);
03555 sum.sign = Integer::NEGATIVE;
03556 }
03557 }
03558 return sum;
03559 }
03560
03561 Integer& Integer::operator+=(const Integer& t)
03562 {
03563 reg.CleanGrow(t.reg.size());
03564 if (NotNegative())
03565 {
03566 if (t.NotNegative())
03567 PositiveAdd(*this, *this, t);
03568 else
03569 PositiveSubtract(*this, *this, t);
03570 }
03571 else
03572 {
03573 if (t.NotNegative())
03574 PositiveSubtract(*this, t, *this);
03575 else
03576 {
03577 PositiveAdd(*this, *this, t);
03578 sign = Integer::NEGATIVE;
03579 }
03580 }
03581 return *this;
03582 }
03583
03584 Integer Integer::Minus(const Integer& b) const
03585 {
03586 Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
03587 if (NotNegative())
03588 {
03589 if (b.NotNegative())
03590 PositiveSubtract(diff, *this, b);
03591 else
03592 PositiveAdd(diff, *this, b);
03593 }
03594 else
03595 {
03596 if (b.NotNegative())
03597 {
03598 PositiveAdd(diff, *this, b);
03599 diff.sign = Integer::NEGATIVE;
03600 }
03601 else
03602 PositiveSubtract(diff, b, *this);
03603 }
03604 return diff;
03605 }
03606
03607 Integer& Integer::operator-=(const Integer& t)
03608 {
03609 reg.CleanGrow(t.reg.size());
03610 if (NotNegative())
03611 {
03612 if (t.NotNegative())
03613 PositiveSubtract(*this, *this, t);
03614 else
03615 PositiveAdd(*this, *this, t);
03616 }
03617 else
03618 {
03619 if (t.NotNegative())
03620 {
03621 PositiveAdd(*this, *this, t);
03622 sign = Integer::NEGATIVE;
03623 }
03624 else
03625 PositiveSubtract(*this, t, *this);
03626 }
03627 return *this;
03628 }
03629
03630 Integer& Integer::operator<<=(size_t n)
03631 {
03632 const size_t wordCount = WordCount();
03633 const size_t shiftWords = n / WORD_BITS;
03634 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03635
03636 reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03637 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03638 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03639 return *this;
03640 }
03641
03642 Integer& Integer::operator>>=(size_t n)
03643 {
03644 const size_t wordCount = WordCount();
03645 const size_t shiftWords = n / WORD_BITS;
03646 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03647
03648 ShiftWordsRightByWords(reg, wordCount, shiftWords);
03649 if (wordCount > shiftWords)
03650 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03651 if (IsNegative() && WordCount()==0)
03652 *this = Zero();
03653 return *this;
03654 }
03655
03656 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03657 {
03658 size_t aSize = RoundupSize(a.WordCount());
03659 size_t bSize = RoundupSize(b.WordCount());
03660
03661 product.reg.CleanNew(RoundupSize(aSize+bSize));
03662 product.sign = Integer::POSITIVE;
03663
03664 IntegerSecBlock workspace(aSize + bSize);
03665 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03666 }
03667
03668 void Multiply(Integer &product, const Integer &a, const Integer &b)
03669 {
03670 PositiveMultiply(product, a, b);
03671
03672 if (a.NotNegative() != b.NotNegative())
03673 product.Negate();
03674 }
03675
03676 Integer Integer::Times(const Integer &b) const
03677 {
03678 Integer product;
03679 Multiply(product, *this, b);
03680 return product;
03681 }
03682
03683
03684
03685
03686
03687
03688
03689
03690
03691
03692
03693
03694
03695
03696
03697
03698
03699
03700
03701
03702
03703
03704
03705 void PositiveDivide(Integer &remainder, Integer "ient,
03706 const Integer &a, const Integer &b)
03707 {
03708 unsigned aSize = a.WordCount();
03709 unsigned bSize = b.WordCount();
03710
03711 if (!bSize)
03712 throw Integer::DivideByZero();
03713
03714 if (aSize < bSize)
03715 {
03716 remainder = a;
03717 remainder.sign = Integer::POSITIVE;
03718 quotient = Integer::Zero();
03719 return;
03720 }
03721
03722 aSize += aSize%2;
03723 bSize += bSize%2;
03724
03725 remainder.reg.CleanNew(RoundupSize(bSize));
03726 remainder.sign = Integer::POSITIVE;
03727 quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03728 quotient.sign = Integer::POSITIVE;
03729
03730 IntegerSecBlock T(aSize+3*(bSize+2));
03731 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03732 }
03733
03734 void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor)
03735 {
03736 PositiveDivide(remainder, quotient, dividend, divisor);
03737
03738 if (dividend.IsNegative())
03739 {
03740 quotient.Negate();
03741 if (remainder.NotZero())
03742 {
03743 --quotient;
03744 remainder = divisor.AbsoluteValue() - remainder;
03745 }
03746 }
03747
03748 if (divisor.IsNegative())
03749 quotient.Negate();
03750 }
03751
03752 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03753 {
03754 q = a;
03755 q >>= n;
03756
03757 const size_t wordCount = BitsToWords(n);
03758 if (wordCount <= a.WordCount())
03759 {
03760 r.reg.resize(RoundupSize(wordCount));
03761 CopyWords(r.reg, a.reg, wordCount);
03762 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03763 if (n % WORD_BITS != 0)
03764 r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
03765 }
03766 else
03767 {
03768 r.reg.resize(RoundupSize(a.WordCount()));
03769 CopyWords(r.reg, a.reg, r.reg.size());
03770 }
03771 r.sign = POSITIVE;
03772
03773 if (a.IsNegative() && r.NotZero())
03774 {
03775 --q;
03776 r = Power2(n) - r;
03777 }
03778 }
03779
03780 Integer Integer::DividedBy(const Integer &b) const
03781 {
03782 Integer remainder, quotient;
03783 Integer::Divide(remainder, quotient, *this, b);
03784 return quotient;
03785 }
03786
03787 Integer Integer::Modulo(const Integer &b) const
03788 {
03789 Integer remainder, quotient;
03790 Integer::Divide(remainder, quotient, *this, b);
03791 return remainder;
03792 }
03793
03794 void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor)
03795 {
03796 if (!divisor)
03797 throw Integer::DivideByZero();
03798
03799 assert(divisor);
03800
03801 if ((divisor & (divisor-1)) == 0)
03802 {
03803 quotient = dividend >> (BitPrecision(divisor)-1);
03804 remainder = dividend.reg[0] & (divisor-1);
03805 return;
03806 }
03807
03808 unsigned int i = dividend.WordCount();
03809 quotient.reg.CleanNew(RoundupSize(i));
03810 remainder = 0;
03811 while (i--)
03812 {
03813 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
03814 remainder = DWord(dividend.reg[i], remainder) % divisor;
03815 }
03816
03817 if (dividend.NotNegative())
03818 quotient.sign = POSITIVE;
03819 else
03820 {
03821 quotient.sign = NEGATIVE;
03822 if (remainder)
03823 {
03824 --quotient;
03825 remainder = divisor - remainder;
03826 }
03827 }
03828 }
03829
03830 Integer Integer::DividedBy(word b) const
03831 {
03832 word remainder;
03833 Integer quotient;
03834 Integer::Divide(remainder, quotient, *this, b);
03835 return quotient;
03836 }
03837
03838 word Integer::Modulo(word divisor) const
03839 {
03840 if (!divisor)
03841 throw Integer::DivideByZero();
03842
03843 assert(divisor);
03844
03845 word remainder;
03846
03847 if ((divisor & (divisor-1)) == 0)
03848 remainder = reg[0] & (divisor-1);
03849 else
03850 {
03851 unsigned int i = WordCount();
03852
03853 if (divisor <= 5)
03854 {
03855 DWord sum(0, 0);
03856 while (i--)
03857 sum += reg[i];
03858 remainder = sum % divisor;
03859 }
03860 else
03861 {
03862 remainder = 0;
03863 while (i--)
03864 remainder = DWord(reg[i], remainder) % divisor;
03865 }
03866 }
03867
03868 if (IsNegative() && remainder)
03869 remainder = divisor - remainder;
03870
03871 return remainder;
03872 }
03873
03874 void Integer::Negate()
03875 {
03876 if (!!(*this))
03877 sign = Sign(1-sign);
03878 }
03879
03880 int Integer::PositiveCompare(const Integer& t) const
03881 {
03882 unsigned size = WordCount(), tSize = t.WordCount();
03883
03884 if (size == tSize)
03885 return CryptoPP::Compare(reg, t.reg, size);
03886 else
03887 return size > tSize ? 1 : -1;
03888 }
03889
03890 int Integer::Compare(const Integer& t) const
03891 {
03892 if (NotNegative())
03893 {
03894 if (t.NotNegative())
03895 return PositiveCompare(t);
03896 else
03897 return 1;
03898 }
03899 else
03900 {
03901 if (t.NotNegative())
03902 return -1;
03903 else
03904 return -PositiveCompare(t);
03905 }
03906 }
03907
03908 Integer Integer::SquareRoot() const
03909 {
03910 if (!IsPositive())
03911 return Zero();
03912
03913
03914 Integer x, y = Power2((BitCount()+1)/2);
03915 assert(y*y >= *this);
03916
03917 do
03918 {
03919 x = y;
03920 y = (x + *this/x) >> 1;
03921 } while (y<x);
03922
03923 return x;
03924 }
03925
03926 bool Integer::IsSquare() const
03927 {
03928 Integer r = SquareRoot();
03929 return *this == r.Squared();
03930 }
03931
03932 bool Integer::IsUnit() const
03933 {
03934 return (WordCount() == 1) && (reg[0] == 1);
03935 }
03936
03937 Integer Integer::MultiplicativeInverse() const
03938 {
03939 return IsUnit() ? *this : Zero();
03940 }
03941
03942 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
03943 {
03944 return x*y%m;
03945 }
03946
03947 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
03948 {
03949 ModularArithmetic mr(m);
03950 return mr.Exponentiate(x, e);
03951 }
03952
03953 Integer Integer::Gcd(const Integer &a, const Integer &b)
03954 {
03955 return EuclideanDomainOf<Integer>().Gcd(a, b);
03956 }
03957
03958 Integer Integer::InverseMod(const Integer &m) const
03959 {
03960 assert(m.NotNegative());
03961
03962 if (IsNegative())
03963 return Modulo(m).InverseMod(m);
03964
03965 if (m.IsEven())
03966 {
03967 if (!m || IsEven())
03968 return Zero();
03969 if (*this == One())
03970 return One();
03971
03972 Integer u = m.Modulo(*this).InverseMod(*this);
03973 return !u ? Zero() : (m*(*this-u)+1)/(*this);
03974 }
03975
03976 SecBlock<word> T(m.reg.size() * 4);
03977 Integer r((word)0, m.reg.size());
03978 unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
03979 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
03980 return r;
03981 }
03982
03983 word Integer::InverseMod(word mod) const
03984 {
03985 word g0 = mod, g1 = *this % mod;
03986 word v0 = 0, v1 = 1;
03987 word y;
03988
03989 while (g1)
03990 {
03991 if (g1 == 1)
03992 return v1;
03993 y = g0 / g1;
03994 g0 = g0 % g1;
03995 v0 += y * v1;
03996
03997 if (!g0)
03998 break;
03999 if (g0 == 1)
04000 return mod-v0;
04001 y = g1 / g0;
04002 g1 = g1 % g0;
04003 v1 += y * v0;
04004 }
04005 return 0;
04006 }
04007
04008
04009
04010 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
04011 {
04012 BERSequenceDecoder seq(bt);
04013 OID oid(seq);
04014 if (oid != ASN1::prime_field())
04015 BERDecodeError();
04016 m_modulus.BERDecode(seq);
04017 seq.MessageEnd();
04018 m_result.reg.resize(m_modulus.reg.size());
04019 }
04020
04021 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
04022 {
04023 DERSequenceEncoder seq(bt);
04024 ASN1::prime_field().DEREncode(seq);
04025 m_modulus.DEREncode(seq);
04026 seq.MessageEnd();
04027 }
04028
04029 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
04030 {
04031 a.DEREncodeAsOctetString(out, MaxElementByteLength());
04032 }
04033
04034 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
04035 {
04036 a.BERDecodeAsOctetString(in, MaxElementByteLength());
04037 }
04038
04039 const Integer& ModularArithmetic::Half(const Integer &a) const
04040 {
04041 if (a.reg.size()==m_modulus.reg.size())
04042 {
04043 CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
04044 return m_result;
04045 }
04046 else
04047 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
04048 }
04049
04050 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
04051 {
04052 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04053 {
04054 if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
04055 || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
04056 {
04057 CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04058 }
04059 return m_result;
04060 }
04061 else
04062 {
04063 m_result1 = a+b;
04064 if (m_result1 >= m_modulus)
04065 m_result1 -= m_modulus;
04066 return m_result1;
04067 }
04068 }
04069
04070 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
04071 {
04072 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04073 {
04074 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04075 || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
04076 {
04077 CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
04078 }
04079 }
04080 else
04081 {
04082 a+=b;
04083 if (a>=m_modulus)
04084 a-=m_modulus;
04085 }
04086
04087 return a;
04088 }
04089
04090 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
04091 {
04092 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04093 {
04094 if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
04095 CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04096 return m_result;
04097 }
04098 else
04099 {
04100 m_result1 = a-b;
04101 if (m_result1.IsNegative())
04102 m_result1 += m_modulus;
04103 return m_result1;
04104 }
04105 }
04106
04107 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
04108 {
04109 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04110 {
04111 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04112 CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
04113 }
04114 else
04115 {
04116 a-=b;
04117 if (a.IsNegative())
04118 a+=m_modulus;
04119 }
04120
04121 return a;
04122 }
04123
04124 const Integer& ModularArithmetic::Inverse(const Integer &a) const
04125 {
04126 if (!a)
04127 return a;
04128
04129 CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
04130 if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
04131 Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
04132
04133 return m_result;
04134 }
04135
04136 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
04137 {
04138 if (m_modulus.IsOdd())
04139 {
04140 MontgomeryRepresentation dr(m_modulus);
04141 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04142 }
04143 else
04144 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04145 }
04146
04147 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
04148 {
04149 if (m_modulus.IsOdd())
04150 {
04151 MontgomeryRepresentation dr(m_modulus);
04152 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04153 for (unsigned int i=0; i<exponentsCount; i++)
04154 results[i] = dr.ConvertOut(results[i]);
04155 }
04156 else
04157 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04158 }
04159
04160 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)
04161 : ModularArithmetic(m),
04162 m_u((word)0, m_modulus.reg.size()),
04163 m_workspace(5*m_modulus.reg.size())
04164 {
04165 if (!m_modulus.IsOdd())
04166 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04167
04168 RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
04169 }
04170
04171 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
04172 {
04173 word *const T = m_workspace.begin();
04174 word *const R = m_result.reg.begin();
04175 const size_t N = m_modulus.reg.size();
04176 assert(a.reg.size()<=N && b.reg.size()<=N);
04177
04178 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04179 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04180 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04181 return m_result;
04182 }
04183
04184 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
04185 {
04186 word *const T = m_workspace.begin();
04187 word *const R = m_result.reg.begin();
04188 const size_t N = m_modulus.reg.size();
04189 assert(a.reg.size()<=N);
04190
04191 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04192 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04193 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04194 return m_result;
04195 }
04196
04197 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
04198 {
04199 word *const T = m_workspace.begin();
04200 word *const R = m_result.reg.begin();
04201 const size_t N = m_modulus.reg.size();
04202 assert(a.reg.size()<=N);
04203
04204 CopyWords(T, a.reg, a.reg.size());
04205 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04206 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04207 return m_result;
04208 }
04209
04210 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
04211 {
04212
04213 word *const T = m_workspace.begin();
04214 word *const R = m_result.reg.begin();
04215 const size_t N = m_modulus.reg.size();
04216 assert(a.reg.size()<=N);
04217
04218 CopyWords(T, a.reg, a.reg.size());
04219 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04220 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04221 unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
04222
04223
04224
04225 if (k>N*WORD_BITS)
04226 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
04227 else
04228 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
04229
04230 return m_result;
04231 }
04232
04233 NAMESPACE_END
04234
04235 #endif