12 #if defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
16 #if defined(CRYPTOPP_DISABLE_VMAC_ASM)
17 # undef CRYPTOPP_X86_ASM_AVAILABLE
18 # undef CRYPTOPP_X32_ASM_AVAILABLE
19 # undef CRYPTOPP_X64_ASM_AVAILABLE
20 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
23 #if CRYPTOPP_MSC_VERSION
24 # pragma warning(disable: 4731)
27 ANONYMOUS_NAMESPACE_BEGIN
29 #if defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE)
30 using CryptoPP::word128;
31 using CryptoPP::word64;
32 # define VMAC_BOOL_WORD128 1
34 using CryptoPP::word64;
35 # define VMAC_BOOL_WORD128 0
39 #define const // Turbo C++ 2006 workaround
41 const word64 p64 = W64LIT(0xfffffffffffffeff);
42 const word64 m62 = W64LIT(0x3fffffffffffffff);
43 const word64 m63 = W64LIT(0x7fffffffffffffff);
44 const word64 m64 = W64LIT(0xffffffffffffffff);
45 const word64 mpoly = W64LIT(0x1fffffff1fffffff);
52 # if defined(__powerpc__) && defined (CRYPTOPP_GCC_VERSION) && (CRYPTOPP_GCC_VERSION < 50300)
53 # define m126 ((word128(m62)<<64)|m64)
55 const word128 m126 = (word128(m62)<<64)|m64;
59 ANONYMOUS_NAMESPACE_END
65 int digestLength = params.GetIntValueWithDefault(
Name::DigestSize(), DefaultDigestSize());
66 if (digestLength != 8 && digestLength != 16)
68 m_is128 = digestLength == 16;
71 if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
72 throw InvalidArgument(
"VMAC: L1KeyLength must be a positive multiple of 128");
77 cipher.
SetKey(userKey, keylength, params);
78 const unsigned int blockSize = cipher.
BlockSize();
79 const unsigned int blockSizeInWords = blockSize /
sizeof(word64);
88 ConditionalByteReverse<word64>(
BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*
sizeof(word64));
93 for (i = 0; i <= (size_t)m_is128; i++)
104 word64 *l3Key = m_l3Key();
107 for (i = 0; i <= (size_t)m_is128; i++)
114 }
while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
118 const byte *nonce = GetIVAndThrowIfInvalid(params, nonceLength);
119 Resynchronize(nonce, (
int)nonceLength);
130 size_t length = ThrowIfInvalidIVLength(len);
132 byte *storedNonce = m_nonce();
136 memset(storedNonce, 0, s-length);
137 memcpy(storedNonce+s-length, nonce, length);
142 if (m_padCached && (storedNonce[s-1] | 1) == (nonce[length-1] | 1))
145 for (
size_t i=0; m_padCached && i<s-length; i++)
146 m_padCached = (storedNonce[i] == 0);
150 memset(storedNonce, 0, s-length);
151 memcpy(storedNonce+s-length, nonce, length-1);
152 storedNonce[s-1] = nonce[length-1] & 0xfe;
156 storedNonce[s-1] = nonce[length-1];
158 m_isFirstBlock =
true;
162 void VMAC_Base::HashEndianCorrectedBlock(
const word64 *data)
164 CRYPTOPP_UNUSED(data);
166 throw NotImplemented(
"VMAC: HashEndianCorrectedBlock is not implemented");
172 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
178 #if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
179 #if CRYPTOPP_MSC_VERSION
180 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
184 void VMAC_Base::VHASH_Update_SSE2(
const word64 *data,
size_t blocksRemainingInWord64,
int tagPart)
186 const word64 *nhK = m_nhKey();
187 word64 *polyS = (word64*)(
void*)m_polyState();
191 CRYPTOPP_UNUSED(data); CRYPTOPP_UNUSED(tagPart);
193 CRYPTOPP_UNUSED(blocksRemainingInWord64);
209 #
if CRYPTOPP_BOOL_X86
213 AS2( mov %%ebx, -20(%%esp))
220 #
if defined(__INTEL_COMPILER)
221 char isFirstBlock = m_isFirstBlock;
223 AS2( mov dl, [isFirstBlock])
226 AS2( mov ebx, [ecx+m_L1KeyLength])
227 AS2( mov dl, [ecx+m_isFirstBlock])
229 AS2( mov eax, tagPart)
237 AS2( mov ecx, blocksRemainingInWord64)
248 AS2( lea ebp, [edi+8*ebp])
249 AS2( movq mm6, [esi])
250 AS2( paddq mm6, [edi])
251 AS2( movq mm5, [esi+8])
252 AS2( paddq mm5, [edi+8])
256 ASS( pshufw mm2, mm6, 1, 0, 3, 2)
257 AS2( pmuludq mm6, mm5)
258 ASS( pshufw mm3, mm5, 1, 0, 3, 2)
259 AS2( pmuludq mm5, mm2)
260 AS2( pmuludq mm2, mm3)
261 AS2( pmuludq mm3, mm4)
263 AS2( movd [esp], mm6)
265 AS2( movd [esp+4], mm5)
270 AS2( movq mm0, [esi])
271 AS2( paddq mm0, [edi])
272 AS2( movq mm1, [esi+8])
273 AS2( paddq mm1, [edi+8])
278 ASS( pshufw mm2, mm0, 1, 0, 3, 2)
279 AS2( pmuludq mm0, mm1)
280 AS2( movd [esp+8], mm3)
283 ASS( pshufw mm3, mm1, 1, 0, 3, 2)
284 AS2( pmuludq mm1, mm2)
285 AS2( pmuludq mm2, mm3)
286 AS2( pmuludq mm3, mm4)
287 AS2( movd mm4, [esp])
289 AS2( movd mm4, [esp+4])
291 AS2( movd mm4, [esp+8])
293 AS2( movd [esp], mm0)
296 AS2( movd [esp+4], mm1)
303 AS2( movd [esp+8], mm3)
306 AS2( movd mm4, [esp])
308 AS2( movd mm4, [esp+4])
310 AS2( movd mm4, [esp+8])
312 AS2( lea ebp, [8*ebx])
315 AS2( movd [esp], mm7)
318 AS2( movd [esp+4], mm6)
328 #define k0 [eax+2*8+2*4]
329 #define k1 [eax+2*8+3*4]
330 #define k2 [eax+2*8+0*4]
331 #define k3 [eax+2*8+1*4]
336 AS2( movd mm0, [esp])
341 AS2( movd mm2, [esp+4])
354 AS2( pmuludq mm0, k3)
356 AS2( pmuludq mm1, k2)
359 AS2( pmuludq mm2, mm6)
365 AS2( pmuludq mm3, mm7)
366 AS2( pmuludq mm4, mm7)
367 AS2( pmuludq mm5, mm6)
372 AS2( pmuludq mm1, k2)
377 AS2( pmuludq mm2, k3)
378 AS2( pmuludq mm3, mm7)
379 AS2( movd [esp+8], mm0)
381 AS2( pmuludq mm7, mm5)
382 AS2( pmuludq mm5, k3)
385 AS2( pmuludq mm1, k2)
390 AS2( pmuludq mm2, mm6)
391 AS2( pmuludq mm6, a0)
394 AS2( movd mm3, [esp])
397 AS2( pmuludq mm3, k3)
400 AS2( pmuludq mm1, k2)
402 AS2( movd mm2, [esp+4])
409 AS2( movd mm7, [esp+8])
439 # if CRYPTOPP_BOOL_X86
441 AS2( mov -20(%%esp), %%ebx)
444 :
"m" (
L1KeyLength),
"c" (blocksRemainingInWord64),
"S" (data),
445 "D" (nhK+tagPart*2),
"d" (m_isFirstBlock),
"a" (polyS+tagPart*4)
452 #if VMAC_BOOL_WORD128
453 #define DeclareNH(a) word128 a=0
454 #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
455 #define AccumulateNH(a, b, c) a += word128(b)*(c)
456 #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
458 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_IA64))
459 #define MUL32(a, b) __emulu(word32(a), word32(b))
461 #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
463 #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
464 #define DeclareNH(a) word64 a##0=0, a##1=0
465 #define MUL64(rh,rl,i1,i2) asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
466 #define AccumulateNH(a, b, c) asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
467 #define ADD128(rh,rl,ih,il) asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
468 #elif defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
469 #define DeclareNH(a) word64 a##0=0, a##1=0
470 #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
471 #define AccumulateNH(a, b, c) {\
473 pl = _umul128(b,c,&ph);\
475 a##1 += ph + (a##0 < pl);}
477 #define VMAC_BOOL_32BIT 1
478 #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
479 #define MUL64(rh,rl,i1,i2) \
480 { word64 _i1 = (i1), _i2 = (i2); \
481 word64 m1= MUL32(_i1,_i2>>32); \
482 word64 m2= MUL32(_i1>>32,_i2); \
483 rh = MUL32(_i1>>32,_i2>>32); \
484 rl = MUL32(_i1,_i2); \
485 ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
486 ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
488 #define AccumulateNH(a, b, c) {\
489 word64 p = MUL32(b, c);\
490 a##1 += word32((p)>>32);\
492 p = MUL32((b)>>32, c);\
493 a##2 += word32((p)>>32);\
495 p = MUL32((b)>>32, (c)>>32);\
497 p = MUL32(b, (c)>>32);\
499 a##2 += word32(p>>32);}
502 #ifndef VMAC_BOOL_32BIT
503 #define VMAC_BOOL_32BIT 0
506 #define ADD128(rh,rl,ih,il) \
507 { word64 _il = (il); \
509 (rh) += (ih) + ((rl) < (_il)); \
513 template <
bool T_128BitTag>
514 void VMAC_Base::VHASH_Update_Template(
const word64 *data,
size_t blocksRemainingInWord64)
519 #define INNER_LOOP_ITERATION(j) {\
520 word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
521 word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
522 AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
524 AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
527 size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
528 size_t innerLoopEnd = L1KeyLengthInWord64;
529 const word64 *nhK = m_nhKey();
530 word64 *polyS = (word64*)(
void*)m_polyState();
531 bool isFirstBlock =
true;
535 #if VMAC_BOOL_WORD128
538 word64 ah1=0, al1=0, ah2=0, al2=0;
540 word64 kh1, kl1, kh2, kl2;
541 kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
544 kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
554 if (blocksRemainingInWord64 < L1KeyLengthInWord64)
556 if (blocksRemainingInWord64 % 8)
558 innerLoopEnd = blocksRemainingInWord64 % 8;
559 for (; i<innerLoopEnd; i+=2)
560 INNER_LOOP_ITERATION(0);
562 innerLoopEnd = blocksRemainingInWord64;
564 for (; i<innerLoopEnd; i+=8)
566 INNER_LOOP_ITERATION(0);
567 INNER_LOOP_ITERATION(1);
568 INNER_LOOP_ITERATION(2);
569 INNER_LOOP_ITERATION(3);
571 blocksRemainingInWord64 -= innerLoopEnd;
572 data += innerLoopEnd;
575 word32 nh0[2], nh1[2];
578 nh0[0] = word32(nhA0);
579 nhA1 += (nhA0 >> 32);
580 nh1[0] = word32(nhA1);
581 nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
585 nh0[1] = word32(nhB0);
586 nhB1 += (nhB0 >> 32);
587 nh1[1] = word32(nhB1);
588 nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
591 #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
592 #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum())) // workaround for GCC 3.2
593 #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
594 #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum()))
595 #define aHi ((polyS+i*4)[0])
596 #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
597 #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum()))
598 #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
599 #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum()))
600 #define kHi ((polyS+i*4+2)[0])
604 isFirstBlock =
false;
607 m_isFirstBlock =
false;
608 for (i=0; i<=(size_t)T_128BitTag; i++)
610 word64 t = (word64)nh0[i] + k0;
612 t = (t >> 32) + nh1[i] + k1;
614 aHi = (t >> 32) + nh2[i] + kHi;
619 for (i=0; i<=(size_t)T_128BitTag; i++)
635 t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
639 p += MUL32(a1, 2*k3);
640 p += MUL32(a2, 2*k2);
641 p += MUL32(a3, 2*k1);
647 p += MUL32(a2, 2*k3);
648 p += MUL32(a3, 2*k2);
664 #else // #if VMAC_BOOL_32BIT
667 isFirstBlock =
false;
670 m_isFirstBlock =
false;
671 #if VMAC_BOOL_WORD128
672 #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl)
674 first_poly_step(a1, kh1, kl1, nhA);
676 first_poly_step(a2, kh2, kl2, nhB);
678 #define first_poly_step(ah, al, kh, kl, mh, ml) {\
680 ADD128(mh, ml, kh, kl); \
683 first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
685 first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
691 #if VMAC_BOOL_WORD128
692 a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
694 ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
698 #if VMAC_BOOL_WORD128
699 a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
701 ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
707 #if VMAC_BOOL_WORD128
708 #define poly_step(a, kh, kl, m) \
709 { word128 t1, t2, t3, t4;\
710 Multiply128(t2, a>>64, kl);\
711 Multiply128(t3, a, kh);\
712 Multiply128(t1, a, kl);\
713 Multiply128(t4, a>>64, 2*kh);\
717 a = (word128(word64(t2)&m63) << 64) | word64(t4);\
722 poly_step(a1, kh1, kl1, nhA);
724 poly_step(a2, kh2, kl2, nhB);
726 #define poly_step(ah, al, kh, kl, mh, ml) \
727 { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
729 MUL64(t2h,t2l,ah,kl); \
730 MUL64(t3h,t3l,al,kh); \
731 MUL64(t1h,t1l,ah,2*kh); \
732 MUL64(ah,al,al,kl); \
734 ADD128(t2h,t2l,t3h,t3l); \
736 ADD128(ah,al,t1h,t1l); \
739 ADD128(t2h,ah,z,t2l); \
741 t2h += t2h + (ah >> 63); \
745 ADD128(ah,al,mh,ml); \
746 ADD128(ah,al,z,t2h); \
749 poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
751 poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
753 #endif // #if VMAC_BOOL_32BIT
754 }
while (blocksRemainingInWord64);
756 #if VMAC_BOOL_WORD128
757 (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
760 (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
762 #elif !VMAC_BOOL_32BIT
763 (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
766 (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
771 inline void VMAC_Base::VHASH_Update(
const word64 *data,
size_t blocksRemainingInWord64)
773 #if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
776 VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
778 VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
779 m_isFirstBlock =
false;
785 VHASH_Update_Template<true>(data, blocksRemainingInWord64);
787 VHASH_Update_Template<false>(data, blocksRemainingInWord64);
791 size_t VMAC_Base::HashMultipleBlocks(
const word64 *data,
size_t length)
793 size_t remaining =
ModPowerOf2(length, m_L1KeyLength);
794 VHASH_Update(data, (length-remaining)/8);
798 word64 L3Hash(
const word64 *input,
const word64 *l3Key,
size_t len)
800 word64 rh, rl, t, z=0;
801 word64 p1 = input[0], p2 = input[1];
802 word64 k1 = l3Key[0], k2 = l3Key[1];
807 ADD128(p1, p2, len, t);
809 t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
810 ADD128(p1, p2, z, t);
816 t += (word32)t > 0xfffffffeU;
822 p1 += (0 - (p1 < k1)) & 257;
824 p2 += (0 - (p2 < k2)) & 257;
827 MUL64(rh, rl, p1, p2);
829 ADD128(t, rl, z, rh);
831 ADD128(t, rl, z, rh);
834 rl += (0 - (rl < t)) & 257;
835 rl += (0 - (rl > p64-1)) & 257;
843 size_t len =
ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
847 memset(m_data()+len, 0, (0-len)%16);
848 VHASH_Update(DataBuf(), ((len+15)/16)*2);
851 else if (m_isFirstBlock)
854 m_polyState()[0] = m_polyState()[2];
855 m_polyState()[1] = m_polyState()[3];
858 m_polyState()[4] = m_polyState()[6];
859 m_polyState()[5] = m_polyState()[7];
866 t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(
true,
BIG_ENDIAN_ORDER, m_pad());
867 t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(
true,
BIG_ENDIAN_ORDER, m_pad()+8);
877 memcpy(mac, t, size);
882 word64 t = L3Hash(m_polyState(), m_l3Key(), len);
889 memcpy(mac, &t, size);