00001
00002
00003
00004
00005 #include "pch.h"
00006
00007 #ifndef CRYPTOPP_GENERATE_X64_MASM
00008
00009 #include "panama.h"
00010 #include "misc.h"
00011 #include "cpu.h"
00012
00013 NAMESPACE_BEGIN(CryptoPP)
00014
00015 template <class B>
00016 void Panama<B>::Reset()
00017 {
00018 memset(m_state, 0, m_state.SizeInBytes());
00019 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
00020 m_state[17] = HasSSSE3();
00021 #endif
00022 }
00023
00024 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
00025
00026 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00027 extern "C" {
00028 void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y);
00029 }
00030 #elif CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00031
00032 #ifdef CRYPTOPP_GENERATE_X64_MASM
00033 Panama_SSE2_Pull PROC FRAME
00034 rex_push_reg rdi
00035 alloc_stack(2*16)
00036 save_xmm128 xmm6, 0h
00037 save_xmm128 xmm7, 10h
00038 .endprolog
00039 #else
00040 #pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
00041 void CRYPTOPP_NOINLINE Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y)
00042 {
00043 #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
00044 asm __volatile__
00045 (
00046 ".intel_syntax noprefix;"
00047 AS_PUSH_IF86( bx)
00048 #else
00049 AS2( mov AS_REG_1, count)
00050 AS2( mov AS_REG_2, state)
00051 AS2( mov AS_REG_3, z)
00052 AS2( mov AS_REG_4, y)
00053 #endif
00054 #endif
00055
00056 #if CRYPTOPP_BOOL_X86
00057 #define REG_loopEnd [esp]
00058 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
00059 #define REG_loopEnd rdi
00060 #else
00061 #define REG_loopEnd r8
00062 #endif
00063
00064 AS2( shl AS_REG_1, 5)
00065 ASJ( jz, 5, f)
00066 AS2( mov AS_REG_6d, [AS_REG_2+4*17])
00067 AS2( add AS_REG_1, AS_REG_6)
00068
00069 #if CRYPTOPP_BOOL_X64
00070 AS2( mov REG_loopEnd, AS_REG_1)
00071 #else
00072 AS1( push ebp)
00073 AS1( push AS_REG_1)
00074 #endif
00075
00076 AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+0*16])
00077 AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_2+1*16])
00078 AS2( movdqa xmm2, XMMWORD_PTR [AS_REG_2+2*16])
00079 AS2( movdqa xmm3, XMMWORD_PTR [AS_REG_2+3*16])
00080 AS2( mov eax, dword ptr [AS_REG_2+4*16])
00081
00082 ASL(4)
00083
00084 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
00085 AS2( test AS_REG_6, 1)
00086 ASJ( jnz, 6, f)
00087 #endif
00088 AS2( movdqa xmm6, xmm2)
00089 AS2( movss xmm6, xmm3)
00090 ASS( pshufd xmm5, xmm6, 0, 3, 2, 1)
00091 AS2( movd xmm6, eax)
00092 AS2( movdqa xmm7, xmm3)
00093 AS2( movss xmm7, xmm6)
00094 ASS( pshufd xmm6, xmm7, 0, 3, 2, 1)
00095 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
00096 ASJ( jmp, 7, f)
00097 ASL(6)
00098 AS2( movdqa xmm5, xmm3)
00099 AS3( palignr xmm5, xmm2, 4)
00100 AS2( movd xmm6, eax)
00101 AS3( palignr xmm6, xmm3, 4)
00102 ASL(7)
00103 #endif
00104
00105 AS2( movd AS_REG_1d, xmm2)
00106 AS1( not AS_REG_1d)
00107 AS2( movd AS_REG_7d, xmm3)
00108 AS2( or AS_REG_1d, AS_REG_7d)
00109 AS2( xor eax, AS_REG_1d)
00110
00111 #define SSE2_Index(i) ASM_MOD(((i)*13+16), 17)
00112
00113 #define pi(i) \
00114 AS2( movd AS_REG_1d, xmm7)\
00115 AS2( rol AS_REG_1d, ASM_MOD((ASM_MOD(5*i,17)*(ASM_MOD(5*i,17)+1)/2), 32))\
00116 AS2( mov [AS_REG_2+SSE2_Index(ASM_MOD(5*(i), 17))*4], AS_REG_1d)
00117
00118 #define pi4(x, y, z, a, b, c, d) \
00119 AS2( pcmpeqb xmm7, xmm7)\
00120 AS2( pxor xmm7, x)\
00121 AS2( por xmm7, y)\
00122 AS2( pxor xmm7, z)\
00123 pi(a)\
00124 ASS( pshuflw xmm7, xmm7, 1, 0, 3, 2)\
00125 pi(b)\
00126 AS2( punpckhqdq xmm7, xmm7)\
00127 pi(c)\
00128 ASS( pshuflw xmm7, xmm7, 1, 0, 3, 2)\
00129 pi(d)
00130
00131 pi4(xmm1, xmm2, xmm3, 1, 5, 9, 13)
00132 pi4(xmm0, xmm1, xmm2, 2, 6, 10, 14)
00133 pi4(xmm6, xmm0, xmm1, 3, 7, 11, 15)
00134 pi4(xmm5, xmm6, xmm0, 4, 8, 12, 16)
00135
00136
00137 AS2( movdqa xmm4, xmm3)
00138 AS2( punpcklqdq xmm3, xmm2)
00139 AS2( punpckhdq xmm4, xmm2)
00140 AS2( movdqa xmm2, xmm1)
00141 AS2( punpcklqdq xmm1, xmm0)
00142 AS2( punpckhdq xmm2, xmm0)
00143
00144
00145 AS2( test AS_REG_3, AS_REG_3)
00146 ASJ( jz, 0, f)
00147 AS2( movdqa xmm6, xmm4)
00148 AS2( punpcklqdq xmm4, xmm2)
00149 AS2( punpckhqdq xmm6, xmm2)
00150 AS2( test AS_REG_4, 15)
00151 ASJ( jnz, 2, f)
00152 AS2( test AS_REG_4, AS_REG_4)
00153 ASJ( jz, 1, f)
00154 AS2( pxor xmm4, [AS_REG_4])
00155 AS2( pxor xmm6, [AS_REG_4+16])
00156 AS2( add AS_REG_4, 32)
00157 ASJ( jmp, 1, f)
00158 ASL(2)
00159 AS2( movdqu xmm0, [AS_REG_4])
00160 AS2( movdqu xmm2, [AS_REG_4+16])
00161 AS2( pxor xmm4, xmm0)
00162 AS2( pxor xmm6, xmm2)
00163 AS2( add AS_REG_4, 32)
00164 ASL(1)
00165 AS2( test AS_REG_3, 15)
00166 ASJ( jnz, 3, f)
00167 AS2( movdqa XMMWORD_PTR [AS_REG_3], xmm4)
00168 AS2( movdqa XMMWORD_PTR [AS_REG_3+16], xmm6)
00169 AS2( add AS_REG_3, 32)
00170 ASJ( jmp, 0, f)
00171 ASL(3)
00172 AS2( movdqu XMMWORD_PTR [AS_REG_3], xmm4)
00173 AS2( movdqu XMMWORD_PTR [AS_REG_3+16], xmm6)
00174 AS2( add AS_REG_3, 32)
00175 ASL(0)
00176
00177
00178 AS2( lea AS_REG_1, [AS_REG_6 + 32])
00179 AS2( and AS_REG_1, 31*32)
00180 AS2( lea AS_REG_7, [AS_REG_6 + (32-24)*32])
00181 AS2( and AS_REG_7, 31*32)
00182
00183 AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8])
00184 AS2( pxor xmm3, xmm0)
00185 ASS( pshufd xmm0, xmm0, 2, 3, 0, 1)
00186 AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8], xmm3)
00187 AS2( pxor xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8])
00188 AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8], xmm0)
00189
00190 AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8])
00191 AS2( pxor xmm1, xmm4)
00192 AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8], xmm1)
00193 AS2( pxor xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8])
00194 AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8], xmm4)
00195
00196
00197 AS2( movdqa xmm3, XMMWORD_PTR [AS_REG_2+3*16])
00198 AS2( movdqa xmm2, XMMWORD_PTR [AS_REG_2+2*16])
00199 AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_2+1*16])
00200 AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+0*16])
00201
00202 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
00203 AS2( test AS_REG_6, 1)
00204 ASJ( jnz, 8, f)
00205 #endif
00206 AS2( movd xmm6, eax)
00207 AS2( movdqa xmm7, xmm3)
00208 AS2( movss xmm7, xmm6)
00209 AS2( movdqa xmm6, xmm2)
00210 AS2( movss xmm6, xmm3)
00211 AS2( movdqa xmm5, xmm1)
00212 AS2( movss xmm5, xmm2)
00213 AS2( movdqa xmm4, xmm0)
00214 AS2( movss xmm4, xmm1)
00215 ASS( pshufd xmm7, xmm7, 0, 3, 2, 1)
00216 ASS( pshufd xmm6, xmm6, 0, 3, 2, 1)
00217 ASS( pshufd xmm5, xmm5, 0, 3, 2, 1)
00218 ASS( pshufd xmm4, xmm4, 0, 3, 2, 1)
00219 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
00220 ASJ( jmp, 9, f)
00221 ASL(8)
00222 AS2( movd xmm7, eax)
00223 AS3( palignr xmm7, xmm3, 4)
00224 AS2( movq xmm6, xmm3)
00225 AS3( palignr xmm6, xmm2, 4)
00226 AS2( movq xmm5, xmm2)
00227 AS3( palignr xmm5, xmm1, 4)
00228 AS2( movq xmm4, xmm1)
00229 AS3( palignr xmm4, xmm0, 4)
00230 ASL(9)
00231 #endif
00232
00233 AS2( xor eax, 1)
00234 AS2( movd AS_REG_1d, xmm0)
00235 AS2( xor eax, AS_REG_1d)
00236 AS2( movd AS_REG_1d, xmm3)
00237 AS2( xor eax, AS_REG_1d)
00238
00239 AS2( pxor xmm3, xmm2)
00240 AS2( pxor xmm2, xmm1)
00241 AS2( pxor xmm1, xmm0)
00242 AS2( pxor xmm0, xmm7)
00243 AS2( pxor xmm3, xmm7)
00244 AS2( pxor xmm2, xmm6)
00245 AS2( pxor xmm1, xmm5)
00246 AS2( pxor xmm0, xmm4)
00247
00248
00249 AS2( lea AS_REG_1, [AS_REG_6 + (32-4)*32])
00250 AS2( and AS_REG_1, 31*32)
00251 AS2( lea AS_REG_7, [AS_REG_6 + 16*32])
00252 AS2( and AS_REG_7, 31*32)
00253
00254 AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*16])
00255 AS2( movdqa xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*16])
00256 AS2( movdqa xmm6, xmm4)
00257 AS2( punpcklqdq xmm4, xmm5)
00258 AS2( punpckhqdq xmm6, xmm5)
00259 AS2( pxor xmm3, xmm4)
00260 AS2( pxor xmm2, xmm6)
00261
00262 AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+1*16])
00263 AS2( movdqa xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+1*16])
00264 AS2( movdqa xmm6, xmm4)
00265 AS2( punpcklqdq xmm4, xmm5)
00266 AS2( punpckhqdq xmm6, xmm5)
00267 AS2( pxor xmm1, xmm4)
00268 AS2( pxor xmm0, xmm6)
00269
00270
00271 AS2( add AS_REG_6, 32)
00272 AS2( cmp AS_REG_6, REG_loopEnd)
00273 ASJ( jne, 4, b)
00274
00275
00276 AS2( mov [AS_REG_2+4*16], eax)
00277 AS2( movdqa XMMWORD_PTR [AS_REG_2+3*16], xmm3)
00278 AS2( movdqa XMMWORD_PTR [AS_REG_2+2*16], xmm2)
00279 AS2( movdqa XMMWORD_PTR [AS_REG_2+1*16], xmm1)
00280 AS2( movdqa XMMWORD_PTR [AS_REG_2+0*16], xmm0)
00281
00282 #if CRYPTOPP_BOOL_X86
00283 AS2( add esp, 4)
00284 AS1( pop ebp)
00285 #endif
00286 ASL(5)
00287
00288 #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
00289 AS_POP_IF86( bx)
00290 ".att_syntax prefix;"
00291 :
00292 #if CRYPTOPP_BOOL_X64
00293 : "D" (count), "S" (state), "d" (z), "c" (y)
00294 : "%r8", "%r9", "r10", "%eax", "memory", "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
00295 #else
00296 : "c" (count), "d" (state), "S" (z), "D" (y)
00297 : "%eax", "memory", "cc"
00298 #endif
00299 );
00300 #endif
00301 #ifdef CRYPTOPP_GENERATE_X64_MASM
00302 movdqa xmm6, [rsp + 0h]
00303 movdqa xmm7, [rsp + 10h]
00304 add rsp, 2*16
00305 pop rdi
00306 ret
00307 Panama_SSE2_Pull ENDP
00308 #else
00309 }
00310 #endif
00311 #endif // #ifdef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00312
00313 #ifndef CRYPTOPP_GENERATE_X64_MASM
00314
00315 template <class B>
00316 void Panama<B>::Iterate(size_t count, const word32 *p, word32 *z, const word32 *y)
00317 {
00318 word32 bstart = m_state[17];
00319 word32 *const aPtr = m_state;
00320 word32 cPtr[17];
00321
00322 #define bPtr ((byte *)(aPtr+20))
00323
00324
00325
00326
00327 #define a(i) aPtr[((i)*13+16) % 17] // 13 is inverse of 4 mod 17
00328 #define c(i) cPtr[((i)*13+16) % 17]
00329
00330 #define b(i, j) b##i[(j)*2%8 + (j)/4]
00331
00332
00333 #define OA(i) z[i] = ConditionalByteReverse(B::ToEnum(), a(i+9))
00334 #define OX(i) z[i] = y[i] ^ ConditionalByteReverse(B::ToEnum(), a(i+9))
00335
00336 #define US(i) {word32 t=b(0,i); b(0,i)=ConditionalByteReverse(B::ToEnum(), p[i])^t; b(25,(i+6)%8)^=t;}
00337 #define UL(i) {word32 t=b(0,i); b(0,i)=a(i+1)^t; b(25,(i+6)%8)^=t;}
00338
00339 #define GP(i) c(5*i%17) = rotlFixed(a(i) ^ (a((i+1)%17) | ~a((i+2)%17)), ((5*i%17)*((5*i%17)+1)/2)%32)
00340
00341 #define T(i,x) a(i) = c(i) ^ c((i+1)%17) ^ c((i+4)%17) ^ x
00342 #define TS1S(i) T(i+1, ConditionalByteReverse(B::ToEnum(), p[i]))
00343 #define TS1L(i) T(i+1, b(4,i))
00344 #define TS2(i) T(i+9, b(16,i))
00345
00346 while (count--)
00347 {
00348 if (z)
00349 {
00350 if (y)
00351 {
00352 OX(0); OX(1); OX(2); OX(3); OX(4); OX(5); OX(6); OX(7);
00353 y += 8;
00354 }
00355 else
00356 {
00357 OA(0); OA(1); OA(2); OA(3); OA(4); OA(5); OA(6); OA(7);
00358 }
00359 z += 8;
00360 }
00361
00362 word32 *const b16 = (word32 *)(bPtr+((bstart+16*32) & 31*32));
00363 word32 *const b4 = (word32 *)(bPtr+((bstart+(32-4)*32) & 31*32));
00364 bstart += 32;
00365 word32 *const b0 = (word32 *)(bPtr+((bstart) & 31*32));
00366 word32 *const b25 = (word32 *)(bPtr+((bstart+(32-25)*32) & 31*32));
00367
00368 if (p)
00369 {
00370 US(0); US(1); US(2); US(3); US(4); US(5); US(6); US(7);
00371 }
00372 else
00373 {
00374 UL(0); UL(1); UL(2); UL(3); UL(4); UL(5); UL(6); UL(7);
00375 }
00376
00377 GP(0);
00378 GP(1);
00379 GP(2);
00380 GP(3);
00381 GP(4);
00382 GP(5);
00383 GP(6);
00384 GP(7);
00385 GP(8);
00386 GP(9);
00387 GP(10);
00388 GP(11);
00389 GP(12);
00390 GP(13);
00391 GP(14);
00392 GP(15);
00393 GP(16);
00394
00395 T(0,1);
00396
00397 if (p)
00398 {
00399 TS1S(0); TS1S(1); TS1S(2); TS1S(3); TS1S(4); TS1S(5); TS1S(6); TS1S(7);
00400 p += 8;
00401 }
00402 else
00403 {
00404 TS1L(0); TS1L(1); TS1L(2); TS1L(3); TS1L(4); TS1L(5); TS1L(6); TS1L(7);
00405 }
00406
00407 TS2(0); TS2(1); TS2(2); TS2(3); TS2(4); TS2(5); TS2(6); TS2(7);
00408 }
00409 m_state[17] = bstart;
00410 }
00411
00412 namespace Weak {
00413 template <class B>
00414 size_t PanamaHash<B>::HashMultipleBlocks(const word32 *input, size_t length)
00415 {
00416 this->Iterate(length / this->BLOCKSIZE, input);
00417 return length % this->BLOCKSIZE;
00418 }
00419
00420 template <class B>
00421 void PanamaHash<B>::TruncatedFinal(byte *hash, size_t size)
00422 {
00423 this->ThrowIfInvalidTruncatedSize(size);
00424
00425 PadLastBlock(this->BLOCKSIZE, 0x01);
00426
00427 HashEndianCorrectedBlock(this->m_data);
00428
00429 this->Iterate(32);
00430
00431 FixedSizeSecBlock<word32, 8> buf;
00432 this->Iterate(1, NULL, buf, NULL);
00433
00434 memcpy(hash, buf, size);
00435
00436 this->Restart();
00437 }
00438 }
00439
00440 template <class B>
00441 void PanamaCipherPolicy<B>::CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length)
00442 {
00443 assert(length==32);
00444 memcpy(m_key, key, 32);
00445 }
00446
00447 template <class B>
00448 void PanamaCipherPolicy<B>::CipherResynchronize(byte *keystreamBuffer, const byte *iv, size_t length)
00449 {
00450 assert(length==32);
00451 this->Reset();
00452 this->Iterate(1, m_key);
00453 if (iv && IsAligned<word32>(iv))
00454 this->Iterate(1, (const word32 *)iv);
00455 else
00456 {
00457 FixedSizeSecBlock<word32, 8> buf;
00458 if (iv)
00459 memcpy(buf, iv, 32);
00460 else
00461 memset(buf, 0, 32);
00462 this->Iterate(1, buf);
00463 }
00464
00465 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
00466 if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2() && !IsP4())
00467 Panama_SSE2_Pull(32, this->m_state, NULL, NULL);
00468 else
00469 #endif
00470 this->Iterate(32);
00471 }
00472
00473 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
00474 template <class B>
00475 unsigned int PanamaCipherPolicy<B>::GetAlignment() const
00476 {
00477 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
00478 if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
00479 return 16;
00480 else
00481 #endif
00482 return 1;
00483 }
00484 #endif
00485
00486 template <class B>
00487 void PanamaCipherPolicy<B>::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
00488 {
00489 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
00490 if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
00491 Panama_SSE2_Pull(iterationCount, this->m_state, (word32 *)output, (const word32 *)input);
00492 else
00493 #endif
00494 this->Iterate(iterationCount, NULL, (word32 *)output, (const word32 *)input);
00495 }
00496
00497 template class Panama<BigEndian>;
00498 template class Panama<LittleEndian>;
00499
00500 template class Weak::PanamaHash<BigEndian>;
00501 template class Weak::PanamaHash<LittleEndian>;
00502
00503 template class PanamaCipherPolicy<BigEndian>;
00504 template class PanamaCipherPolicy<LittleEndian>;
00505
00506 NAMESPACE_END
00507
00508 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM