31 #include <asm/div64.h>
44 #include "softfloat-macros"
56 #include "softfloat-specialize"
73 flag roundNearestEven;
74 int8 roundIncrement, roundBits;
77 roundingMode = roundData->
mode;
79 roundIncrement = 0x40;
80 if ( ! roundNearestEven ) {
85 roundIncrement = 0x7F;
94 roundBits = absZ & 0x7F;
95 absZ = ( absZ + roundIncrement )>>7;
96 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
99 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
101 return zSign ? 0x80000000 : 0x7FFFFFFF;
116 return a & 0x007FFFFF;
128 return ( a>>23 ) & 0xFF;
159 shiftCount = countLeadingZeros32( aSig ) - 8;
160 *zSigPtr = aSig<<shiftCount;
161 *zExpPtr = 1 - shiftCount;
182 mov %0, %1, asl #31 \n\
183 orr %0, %2, asl #23 \n\
186 :
"g" (f),
"g" (zSign),
"g" (zExp),
"g" (zSig)
190 return ( ( (
bits32) zSign )<<31 ) + ( ( (
bits32) zExp )<<23 ) + zSig;
220 flag roundNearestEven;
221 int8 roundIncrement, roundBits;
224 roundingMode = roundData->
mode;
226 roundIncrement = 0x40;
227 if ( ! roundNearestEven ) {
232 roundIncrement = 0x7F;
241 roundBits = zSig & 0x7F;
242 if ( 0xFD <= (
bits16) zExp ) {
244 || ( ( zExp == 0xFD )
245 && ( (
sbits32) ( zSig + roundIncrement ) < 0 ) )
248 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
254 || ( zSig + roundIncrement < 0x80000000 );
257 roundBits = zSig & 0x7F;
262 zSig = ( zSig + roundIncrement )>>7;
263 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
264 if ( zSig == 0 ) zExp = 0;
284 shiftCount = countLeadingZeros32( zSig ) - 1;
285 return roundAndPackFloat32( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
297 return a &
LIT64( 0x000FFFFFFFFFFFFF );
309 return ( a>>52 ) & 0x7FF;
340 shiftCount = countLeadingZeros64( aSig ) - 11;
341 *zSigPtr = aSig<<shiftCount;
342 *zExpPtr = 1 - shiftCount;
361 return ( ( (
bits64) zSign )<<63 ) + ( ( (
bits64) zExp )<<52 ) + zSig;
391 flag roundNearestEven;
392 int16 roundIncrement, roundBits;
395 roundingMode = roundData->
mode;
397 roundIncrement = 0x200;
398 if ( ! roundNearestEven ) {
403 roundIncrement = 0x3FF;
412 roundBits = zSig & 0x3FF;
413 if ( 0x7FD <= (
bits16) zExp ) {
414 if ( ( 0x7FD < zExp )
415 || ( ( zExp == 0x7FD )
416 && ( (
sbits64) ( zSig + roundIncrement ) < 0 ) )
421 return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
427 || ( zSig + roundIncrement <
LIT64( 0x8000000000000000 ) );
430 roundBits = zSig & 0x3FF;
435 zSig = ( zSig + roundIncrement )>>10;
436 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
437 if ( zSig == 0 ) zExp = 0;
457 shiftCount = countLeadingZeros64( zSig ) - 1;
458 return roundAndPackFloat64( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
486 return a.high & 0x7FFF;
516 shiftCount = countLeadingZeros64( aSig );
517 *zSigPtr = aSig<<shiftCount;
518 *zExpPtr = 1 - shiftCount;
533 z.high = ( ( (
bits16) zSign )<<15 ) + zExp;
565 roundAndPackFloatx80(
569 int8 roundingMode, roundingPrecision;
570 flag roundNearestEven, increment, isTiny;
571 int64 roundIncrement, roundMask, roundBits;
573 roundingMode = roundData->
mode;
574 roundingPrecision = roundData->
precision;
576 if ( roundingPrecision == 80 )
goto precision80;
577 if ( roundingPrecision == 64 ) {
578 roundIncrement =
LIT64( 0x0000000000000400 );
579 roundMask =
LIT64( 0x00000000000007FF );
581 else if ( roundingPrecision == 32 ) {
582 roundIncrement =
LIT64( 0x0000008000000000 );
583 roundMask =
LIT64( 0x000000FFFFFFFFFF );
588 zSig0 |= ( zSig1 != 0 );
589 if ( ! roundNearestEven ) {
594 roundIncrement = roundMask;
603 roundBits = zSig0 & roundMask;
604 if ( 0x7FFD <= (
bits32) ( zExp - 1 ) ) {
605 if ( ( 0x7FFE < zExp )
606 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
614 || ( zSig0 <= zSig0 + roundIncrement );
617 roundBits = zSig0 & roundMask;
620 zSig0 += roundIncrement;
621 if ( (
sbits64) zSig0 < 0 ) zExp = 1;
622 roundIncrement = roundMask + 1;
623 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
624 roundMask |= roundIncrement;
626 zSig0 &= ~ roundMask;
627 return packFloatx80( zSign, zExp, zSig0 );
631 zSig0 += roundIncrement;
632 if ( zSig0 < roundIncrement ) {
634 zSig0 =
LIT64( 0x8000000000000000 );
636 roundIncrement = roundMask + 1;
637 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
638 roundMask |= roundIncrement;
640 zSig0 &= ~ roundMask;
641 if ( zSig0 == 0 ) zExp = 0;
642 return packFloatx80( zSign, zExp, zSig0 );
644 increment = ( (
sbits64) zSig1 < 0 );
645 if ( ! roundNearestEven ) {
658 if ( 0x7FFD <= (
bits32) ( zExp - 1 ) ) {
659 if ( ( 0x7FFE < zExp )
660 || ( ( zExp == 0x7FFE )
661 && ( zSig0 ==
LIT64( 0xFFFFFFFFFFFFFFFF ) )
672 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
674 return packFloatx80( zSign, 0x7FFF,
LIT64( 0x8000000000000000 ) );
681 || ( zSig0 <
LIT64( 0xFFFFFFFFFFFFFFFF ) );
682 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
686 if ( roundNearestEven ) {
687 increment = ( (
sbits64) zSig1 < 0 );
699 zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
700 if ( (
sbits64) zSig0 < 0 ) zExp = 1;
702 return packFloatx80( zSign, zExp, zSig0 );
710 zSig0 =
LIT64( 0x8000000000000000 );
713 zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
717 if ( zSig0 == 0 ) zExp = 0;
720 return packFloatx80( zSign, zExp, zSig0 );
734 normalizeRoundAndPackFloatx80(
745 shiftCount = countLeadingZeros64( zSig0 );
746 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
749 roundAndPackFloatx80( roundData, zSign, zExp, zSig0, zSig1 );
766 if ( a == 0 )
return 0;
767 if ( a == 0x80000000 )
return packFloat32( 1, 0x9E, 0 );
769 return normalizeRoundAndPackFloat32( roundData, zSign, 0x9C, zSign ? - a : a );
787 if ( a == 0 )
return 0;
789 absA = aSign ? - a :
a;
790 shiftCount = countLeadingZeros32( absA ) + 21;
792 return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );
806 floatx80 int32_to_floatx80(
int32 a )
813 if ( a == 0 )
return packFloatx80( 0, 0, 0 );
815 absA = zSign ? - a :
a;
816 shiftCount = countLeadingZeros32( absA ) + 32;
818 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
838 int16 aExp, shiftCount;
845 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
846 if ( aExp ) aSig |= 0x00800000;
847 shiftCount = 0xAF - aExp;
851 return roundAndPackInt32( roundData, aSign, zSig );
869 int16 aExp, shiftCount;
876 shiftCount = aExp - 0x9E;
877 if ( 0 <= shiftCount ) {
878 if ( a == 0xCF000000 )
return 0x80000000;
880 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) )
return 0x7FFFFFFF;
883 else if ( aExp <= 0x7E ) {
887 aSig = ( aSig | 0x00800000 )<<8;
888 z = aSig>>( - shiftCount );
889 if ( (
bits32) ( aSig<<( shiftCount & 31 ) ) ) {
892 return aSign ? - z : z;
913 if ( aExp == 0xFF ) {
914 if ( aSig )
return commonNaNToFloat64( float32ToCommonNaN( a ) );
918 if ( aSig == 0 )
return packFloat64( aSign, 0, 0 );
919 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
936 floatx80 float32_to_floatx80(
float32 a )
945 if ( aExp == 0xFF ) {
946 if ( aSig )
return commonNaNToFloatx80( float32ToCommonNaN( a ) );
947 return packFloatx80( aSign, 0x7FFF,
LIT64( 0x8000000000000000 ) );
950 if ( aSig == 0 )
return packFloatx80( aSign, 0, 0 );
951 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
954 return packFloatx80( aSign, aExp + 0x3F80, ( (
bits64) aSig )<<40 );
972 bits32 lastBitMask, roundBitsMask;
977 if ( 0x96 <= aExp ) {
979 return propagateFloat32NaN( a, a );
983 roundingMode = roundData->
mode;
984 if ( aExp <= 0x7E ) {
985 if ( (
bits32) ( a<<1 ) == 0 )
return a;
988 switch ( roundingMode ) {
995 return aSign ? 0xBF800000 : 0;
997 return aSign ? 0x80000000 : 0x3F800000;
1002 lastBitMask <<= 0x96 - aExp;
1003 roundBitsMask = lastBitMask - 1;
1006 z += lastBitMask>>1;
1007 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1014 z &= ~ roundBitsMask;
1031 int16 aExp, bExp, zExp;
1039 expDiff = aExp - bExp;
1042 if ( 0 < expDiff ) {
1043 if ( aExp == 0xFF ) {
1044 if ( aSig )
return propagateFloat32NaN( a, b );
1056 else if ( expDiff < 0 ) {
1057 if ( bExp == 0xFF ) {
1058 if ( bSig )
return propagateFloat32NaN( a, b );
1071 if ( aExp == 0xFF ) {
1072 if ( aSig | bSig )
return propagateFloat32NaN( a, b );
1075 if ( aExp == 0 )
return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1076 zSig = 0x40000000 + aSig + bSig;
1081 zSig = ( aSig + bSig )<<1;
1088 return roundAndPackFloat32( roundData, zSign, zExp, zSig );
1103 int16 aExp, bExp, zExp;
1111 expDiff = aExp - bExp;
1114 if ( 0 < expDiff )
goto aExpBigger;
1115 if ( expDiff < 0 )
goto bExpBigger;
1116 if ( aExp == 0xFF ) {
1117 if ( aSig | bSig )
return propagateFloat32NaN( a, b );
1119 return float32_default_nan;
1125 if ( bSig < aSig )
goto aBigger;
1126 if ( aSig < bSig )
goto bBigger;
1129 if ( bExp == 0xFF ) {
1130 if ( bSig )
return propagateFloat32NaN( a, b );
1145 goto normalizeRoundAndPack;
1147 if ( aExp == 0xFF ) {
1148 if ( aSig )
return propagateFloat32NaN( a, b );
1162 normalizeRoundAndPack:
1164 return normalizeRoundAndPackFloat32( roundData, zSign, zExp, zSig );
1181 if ( aSign == bSign ) {
1182 return addFloat32Sigs( roundData, a, b, aSign );
1185 return subFloat32Sigs( roundData, a, b, aSign );
1203 if ( aSign == bSign ) {
1204 return subFloat32Sigs( roundData, a, b, aSign );
1207 return addFloat32Sigs( roundData, a, b, aSign );
1221 flag aSign, bSign, zSign;
1222 int16 aExp, bExp, zExp;
1233 zSign = aSign ^ bSign;
1234 if ( aExp == 0xFF ) {
1235 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1236 return propagateFloat32NaN( a, b );
1238 if ( ( bExp | bSig ) == 0 ) {
1240 return float32_default_nan;
1244 if ( bExp == 0xFF ) {
1245 if ( bSig )
return propagateFloat32NaN( a, b );
1246 if ( ( aExp | aSig ) == 0 ) {
1248 return float32_default_nan;
1253 if ( aSig == 0 )
return packFloat32( zSign, 0, 0 );
1254 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1257 if ( bSig == 0 )
return packFloat32( zSign, 0, 0 );
1258 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1260 zExp = aExp + bExp - 0x7F;
1261 aSig = ( aSig | 0x00800000 )<<7;
1262 bSig = ( bSig | 0x00800000 )<<8;
1265 if ( 0 <= (
sbits32) ( zSig<<1 ) ) {
1269 return roundAndPackFloat32( roundData, zSign, zExp, zSig );
1282 flag aSign, bSign, zSign;
1283 int16 aExp, bExp, zExp;
1292 zSign = aSign ^ bSign;
1293 if ( aExp == 0xFF ) {
1294 if ( aSig )
return propagateFloat32NaN( a, b );
1295 if ( bExp == 0xFF ) {
1296 if ( bSig )
return propagateFloat32NaN( a, b );
1298 return float32_default_nan;
1302 if ( bExp == 0xFF ) {
1303 if ( bSig )
return propagateFloat32NaN( a, b );
1308 if ( ( aExp | aSig ) == 0 ) {
1310 return float32_default_nan;
1315 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1318 if ( aSig == 0 )
return packFloat32( zSign, 0, 0 );
1319 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1321 zExp = aExp - bExp + 0x7D;
1322 aSig = ( aSig | 0x00800000 )<<7;
1323 bSig = ( bSig | 0x00800000 )<<8;
1324 if ( bSig <= ( aSig + aSig ) ) {
1333 if ( ( zSig & 0x3F ) == 0 ) {
1334 zSig |= ( ( (
bits64) bSig ) * zSig != ( (
bits64) aSig )<<32 );
1336 return roundAndPackFloat32( roundData, zSign, zExp, zSig );
1349 flag aSign, bSign, zSign;
1350 int16 aExp, bExp, expDiff;
1353 bits64 aSig64, bSig64, q64;
1363 if ( aExp == 0xFF ) {
1364 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1365 return propagateFloat32NaN( a, b );
1368 return float32_default_nan;
1370 if ( bExp == 0xFF ) {
1371 if ( bSig )
return propagateFloat32NaN( a, b );
1377 return float32_default_nan;
1379 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1382 if ( aSig == 0 )
return a;
1383 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1385 expDiff = aExp - bExp;
1388 if ( expDiff < 32 ) {
1391 if ( expDiff < 0 ) {
1392 if ( expDiff < -1 )
return a;
1395 q = ( bSig <= aSig );
1396 if ( q ) aSig -= bSig;
1397 if ( 0 < expDiff ) {
1403 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1411 if ( bSig <= aSig ) aSig -= bSig;
1412 aSig64 = ( (
bits64) aSig )<<40;
1413 bSig64 = ( (
bits64) bSig )<<40;
1415 while ( 0 < expDiff ) {
1416 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1417 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1418 aSig64 = - ( ( bSig * q64 )<<38 );
1422 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1423 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1424 q = q64>>( 64 - expDiff );
1426 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
1429 alternateASig = aSig;
1432 }
while ( 0 <= (
sbits32) aSig );
1433 sigMean = aSig + alternateASig;
1434 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1435 aSig = alternateASig;
1437 zSign = ( (
sbits32) aSig < 0 );
1438 if ( zSign ) aSig = - aSig;
1439 return normalizeRoundAndPackFloat32( roundData, aSign ^ zSign, bExp, aSig );
1460 if ( aExp == 0xFF ) {
1461 if ( aSig )
return propagateFloat32NaN( a, 0 );
1462 if ( ! aSign )
return a;
1464 return float32_default_nan;
1467 if ( ( aExp | aSig ) == 0 )
return a;
1469 return float32_default_nan;
1472 if ( aSig == 0 )
return 0;
1473 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1475 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1476 aSig = ( aSig | 0x00800000 )<<8;
1477 zSig = estimateSqrt32( aExp, aSig ) + 2;
1478 if ( ( zSig & 0x7F ) <= 5 ) {
1484 term = ( (
bits64) zSig ) * zSig;
1485 rem = ( ( (
bits64) aSig )<<32 ) - term;
1488 rem += ( ( (
bits64) zSig )<<1 ) | 1;
1490 zSig |= ( rem != 0 );
1494 return roundAndPackFloat32( roundData, 0, zExp, zSig );
1516 return ( a == b ) || ( (
bits32) ( ( a | b )<<1 ) == 0 );
1540 if ( aSign != bSign )
return aSign || ( (
bits32) ( ( a | b )<<1 ) == 0 );
1541 return ( a == b ) || ( aSign ^ ( a <
b ) );
1564 if ( aSign != bSign )
return aSign && ( (
bits32) ( ( a | b )<<1 ) != 0 );
1565 return ( a != b ) && ( aSign ^ ( a <
b ) );
1586 return ( a == b ) || ( (
bits32) ( ( a | b )<<1 ) == 0 );
1611 if ( aSign != bSign )
return aSign || ( (
bits32) ( ( a | b )<<1 ) == 0 );
1612 return ( a == b ) || ( aSign ^ ( a <
b ) );
1636 if ( aSign != bSign )
return aSign && ( (
bits32) ( ( a | b )<<1 ) != 0 );
1637 return ( a != b ) && ( aSign ^ ( a <
b ) );
1655 int16 aExp, shiftCount;
1661 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1662 if ( aExp ) aSig |=
LIT64( 0x0010000000000000 );
1663 shiftCount = 0x42C - aExp;
1665 return roundAndPackInt32( roundData, aSign, aSig );
1683 int16 aExp, shiftCount;
1690 shiftCount = 0x433 - aExp;
1691 if ( shiftCount < 21 ) {
1692 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1695 else if ( 52 < shiftCount ) {
1699 aSig |=
LIT64( 0x0010000000000000 );
1701 aSig >>= shiftCount;
1703 if ( aSign ) z = - z;
1704 if ( ( z < 0 ) ^ aSign ) {
1707 return aSign ? 0x80000000 : 0x7FFFFFFF;
1709 if ( ( aSig<<shiftCount ) != savedASig ) {
1730 int16 aExp, shiftCount;
1737 if ( aExp ) aSig |=
LIT64( 0x0010000000000000 );
1738 shiftCount = 0x42C - aExp;
1740 return roundAndPackInt32( roundData, aSign, aSig );
1756 int16 aExp, shiftCount;
1763 shiftCount = 0x433 - aExp;
1764 if ( shiftCount < 21 ) {
1765 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1768 else if ( 52 < shiftCount ) {
1772 aSig |=
LIT64( 0x0010000000000000 );
1774 aSig >>= shiftCount;
1776 if ( aSign ) z = - z;
1777 if ( ( z < 0 ) ^ aSign ) {
1780 return aSign ? 0x80000000 : 0x7FFFFFFF;
1782 if ( ( aSig<<shiftCount ) != savedASig ) {
1806 if ( aExp == 0x7FF ) {
1807 if ( aSig )
return commonNaNToFloat32( float64ToCommonNaN( a ) );
1812 if ( aExp || zSig ) {
1816 return roundAndPackFloat32( roundData, aSign, aExp, zSig );
1830 floatx80 float64_to_floatx80(
float64 a )
1839 if ( aExp == 0x7FF ) {
1840 if ( aSig )
return commonNaNToFloatx80( float64ToCommonNaN( a ) );
1841 return packFloatx80( aSign, 0x7FFF,
LIT64( 0x8000000000000000 ) );
1844 if ( aSig == 0 )
return packFloatx80( aSign, 0, 0 );
1845 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
1849 aSign, aExp + 0x3C00, ( aSig |
LIT64( 0x0010000000000000 ) )<<11 );
1867 bits64 lastBitMask, roundBitsMask;
1872 if ( 0x433 <= aExp ) {
1874 return propagateFloat64NaN( a, a );
1878 if ( aExp <= 0x3FE ) {
1879 if ( (
bits64) ( a<<1 ) == 0 )
return a;
1882 switch ( roundData->
mode ) {
1889 return aSign ?
LIT64( 0xBFF0000000000000 ) : 0;
1892 aSign ?
LIT64( 0x8000000000000000 ) :
LIT64( 0x3FF0000000000000 );
1897 lastBitMask <<= 0x433 - aExp;
1898 roundBitsMask = lastBitMask - 1;
1900 roundingMode = roundData->
mode;
1902 z += lastBitMask>>1;
1903 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1910 z &= ~ roundBitsMask;
1927 int16 aExp, bExp, zExp;
1935 expDiff = aExp - bExp;
1938 if ( 0 < expDiff ) {
1939 if ( aExp == 0x7FF ) {
1940 if ( aSig )
return propagateFloat64NaN( a, b );
1947 bSig |=
LIT64( 0x2000000000000000 );
1952 else if ( expDiff < 0 ) {
1953 if ( bExp == 0x7FF ) {
1954 if ( bSig )
return propagateFloat64NaN( a, b );
1961 aSig |=
LIT64( 0x2000000000000000 );
1967 if ( aExp == 0x7FF ) {
1968 if ( aSig | bSig )
return propagateFloat64NaN( a, b );
1971 if ( aExp == 0 )
return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
1972 zSig =
LIT64( 0x4000000000000000 ) + aSig + bSig;
1976 aSig |=
LIT64( 0x2000000000000000 );
1977 zSig = ( aSig + bSig )<<1;
1984 return roundAndPackFloat64( roundData, zSign, zExp, zSig );
1999 int16 aExp, bExp, zExp;
2007 expDiff = aExp - bExp;
2010 if ( 0 < expDiff )
goto aExpBigger;
2011 if ( expDiff < 0 )
goto bExpBigger;
2012 if ( aExp == 0x7FF ) {
2013 if ( aSig | bSig )
return propagateFloat64NaN( a, b );
2015 return float64_default_nan;
2021 if ( bSig < aSig )
goto aBigger;
2022 if ( aSig < bSig )
goto bBigger;
2025 if ( bExp == 0x7FF ) {
2026 if ( bSig )
return propagateFloat64NaN( a, b );
2033 aSig |=
LIT64( 0x4000000000000000 );
2036 bSig |=
LIT64( 0x4000000000000000 );
2041 goto normalizeRoundAndPack;
2043 if ( aExp == 0x7FF ) {
2044 if ( aSig )
return propagateFloat64NaN( a, b );
2051 bSig |=
LIT64( 0x4000000000000000 );
2054 aSig |=
LIT64( 0x4000000000000000 );
2058 normalizeRoundAndPack:
2060 return normalizeRoundAndPackFloat64( roundData, zSign, zExp, zSig );
2077 if ( aSign == bSign ) {
2078 return addFloat64Sigs( roundData, a, b, aSign );
2081 return subFloat64Sigs( roundData, a, b, aSign );
2099 if ( aSign == bSign ) {
2100 return subFloat64Sigs( roundData, a, b, aSign );
2103 return addFloat64Sigs( roundData, a, b, aSign );
2117 flag aSign, bSign, zSign;
2118 int16 aExp, bExp, zExp;
2119 bits64 aSig, bSig, zSig0, zSig1;
2127 zSign = aSign ^ bSign;
2128 if ( aExp == 0x7FF ) {
2129 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2130 return propagateFloat64NaN( a, b );
2132 if ( ( bExp | bSig ) == 0 ) {
2134 return float64_default_nan;
2138 if ( bExp == 0x7FF ) {
2139 if ( bSig )
return propagateFloat64NaN( a, b );
2140 if ( ( aExp | aSig ) == 0 ) {
2142 return float64_default_nan;
2147 if ( aSig == 0 )
return packFloat64( zSign, 0, 0 );
2148 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2151 if ( bSig == 0 )
return packFloat64( zSign, 0, 0 );
2152 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2154 zExp = aExp + bExp - 0x3FF;
2155 aSig = ( aSig |
LIT64( 0x0010000000000000 ) )<<10;
2156 bSig = ( bSig |
LIT64( 0x0010000000000000 ) )<<11;
2158 zSig0 |= ( zSig1 != 0 );
2159 if ( 0 <= (
sbits64) ( zSig0<<1 ) ) {
2163 return roundAndPackFloat64( roundData, zSign, zExp, zSig0 );
2176 flag aSign, bSign, zSign;
2177 int16 aExp, bExp, zExp;
2188 zSign = aSign ^ bSign;
2189 if ( aExp == 0x7FF ) {
2190 if ( aSig )
return propagateFloat64NaN( a, b );
2191 if ( bExp == 0x7FF ) {
2192 if ( bSig )
return propagateFloat64NaN( a, b );
2194 return float64_default_nan;
2198 if ( bExp == 0x7FF ) {
2199 if ( bSig )
return propagateFloat64NaN( a, b );
2204 if ( ( aExp | aSig ) == 0 ) {
2206 return float64_default_nan;
2211 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2214 if ( aSig == 0 )
return packFloat64( zSign, 0, 0 );
2215 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2217 zExp = aExp - bExp + 0x3FD;
2218 aSig = ( aSig |
LIT64( 0x0010000000000000 ) )<<10;
2219 bSig = ( bSig |
LIT64( 0x0010000000000000 ) )<<11;
2220 if ( bSig <= ( aSig + aSig ) ) {
2224 zSig = estimateDiv128To64( aSig, 0, bSig );
2225 if ( ( zSig & 0x1FF ) <= 2 ) {
2227 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2228 while ( (
sbits64) rem0 < 0 ) {
2230 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2232 zSig |= ( rem1 != 0 );
2234 return roundAndPackFloat64( roundData, zSign, zExp, zSig );
2247 flag aSign, bSign, zSign;
2248 int16 aExp, bExp, expDiff;
2259 if ( aExp == 0x7FF ) {
2260 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2261 return propagateFloat64NaN( a, b );
2264 return float64_default_nan;
2266 if ( bExp == 0x7FF ) {
2267 if ( bSig )
return propagateFloat64NaN( a, b );
2273 return float64_default_nan;
2275 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2278 if ( aSig == 0 )
return a;
2279 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2281 expDiff = aExp - bExp;
2282 aSig = ( aSig |
LIT64( 0x0010000000000000 ) )<<11;
2283 bSig = ( bSig |
LIT64( 0x0010000000000000 ) )<<11;
2284 if ( expDiff < 0 ) {
2285 if ( expDiff < -1 )
return a;
2288 q = ( bSig <= aSig );
2289 if ( q ) aSig -= bSig;
2291 while ( 0 < expDiff ) {
2292 q = estimateDiv128To64( aSig, 0, bSig );
2293 q = ( 2 <
q ) ? q - 2 : 0;
2294 aSig = - ( ( bSig>>2 ) * q );
2298 if ( 0 < expDiff ) {
2299 q = estimateDiv128To64( aSig, 0, bSig );
2300 q = ( 2 <
q ) ? q - 2 : 0;
2303 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2310 alternateASig = aSig;
2313 }
while ( 0 <= (
sbits64) aSig );
2314 sigMean = aSig + alternateASig;
2315 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2316 aSig = alternateASig;
2318 zSign = ( (
sbits64) aSig < 0 );
2319 if ( zSign ) aSig = - aSig;
2320 return normalizeRoundAndPackFloat64( roundData, aSign ^ zSign, bExp, aSig );
2336 bits64 rem0, rem1, term0, term1;
2342 if ( aExp == 0x7FF ) {
2343 if ( aSig )
return propagateFloat64NaN( a, a );
2344 if ( ! aSign )
return a;
2346 return float64_default_nan;
2349 if ( ( aExp | aSig ) == 0 )
return a;
2351 return float64_default_nan;
2354 if ( aSig == 0 )
return 0;
2355 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2357 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2358 aSig |=
LIT64( 0x0010000000000000 );
2359 zSig = estimateSqrt32( aExp, aSig>>21 );
2361 aSig <<= 9 - ( aExp & 1 );
2362 zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2;
2363 if ( ( zSig & 0x3FF ) <= 5 ) {
2365 zSig =
LIT64( 0xFFFFFFFFFFFFFFFF );
2370 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2371 while ( (
sbits64) rem0 < 0 ) {
2373 shortShift128Left( 0, zSig, 1, &term0, &term1 );
2375 add128( rem0, rem1, term0, term1, &rem0, &rem1 );
2377 zSig |= ( ( rem0 | rem1 ) != 0 );
2381 return roundAndPackFloat64( roundData, 0, zExp, zSig );
2403 return ( a == b ) || ( (
bits64) ( ( a | b )<<1 ) == 0 );
2427 if ( aSign != bSign )
return aSign || ( (
bits64) ( ( a | b )<<1 ) == 0 );
2428 return ( a == b ) || ( aSign ^ ( a <
b ) );
2451 if ( aSign != bSign )
return aSign && ( (
bits64) ( ( a | b )<<1 ) != 0 );
2452 return ( a != b ) && ( aSign ^ ( a <
b ) );
2473 return ( a == b ) || ( (
bits64) ( ( a | b )<<1 ) == 0 );
2498 if ( aSign != bSign )
return aSign || ( (
bits64) ( ( a | b )<<1 ) == 0 );
2499 return ( a == b ) || ( aSign ^ ( a <
b ) );
2523 if ( aSign != bSign )
return aSign && ( (
bits64) ( ( a | b )<<1 ) != 0 );
2524 return ( a != b ) && ( aSign ^ ( a <
b ) );
2544 int32 aExp, shiftCount;
2547 aSig = extractFloatx80Frac( a );
2548 aExp = extractFloatx80Exp( a );
2549 aSign = extractFloatx80Sign( a );
2550 if ( ( aExp == 0x7FFF ) && (
bits64) ( aSig<<1 ) ) aSign = 0;
2551 shiftCount = 0x4037 - aExp;
2552 if ( shiftCount <= 0 ) shiftCount = 1;
2554 return roundAndPackInt32( roundData, aSign, aSig );
2569 int32 floatx80_to_int32_round_to_zero( floatx80 a )
2572 int32 aExp, shiftCount;
2576 aSig = extractFloatx80Frac( a );
2577 aExp = extractFloatx80Exp( a );
2578 aSign = extractFloatx80Sign( a );
2579 shiftCount = 0x403E - aExp;
2580 if ( shiftCount < 32 ) {
2581 if ( ( aExp == 0x7FFF ) && (
bits64) ( aSig<<1 ) ) aSign = 0;
2584 else if ( 63 < shiftCount ) {
2589 aSig >>= shiftCount;
2591 if ( aSign ) z = - z;
2592 if ( ( z < 0 ) ^ aSign ) {
2595 return aSign ? 0x80000000 : 0x7FFFFFFF;
2597 if ( ( aSig<<shiftCount ) != savedASig ) {
2618 aSig = extractFloatx80Frac( a );
2619 aExp = extractFloatx80Exp( a );
2620 aSign = extractFloatx80Sign( a );
2621 if ( aExp == 0x7FFF ) {
2622 if ( (
bits64) ( aSig<<1 ) ) {
2623 return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
2628 if ( aExp || aSig ) aExp -= 0x3F81;
2629 return roundAndPackFloat32( roundData, aSign, aExp, aSig );
2647 aSig = extractFloatx80Frac( a );
2648 aExp = extractFloatx80Exp( a );
2649 aSign = extractFloatx80Sign( a );
2650 if ( aExp == 0x7FFF ) {
2651 if ( (
bits64) ( aSig<<1 ) ) {
2652 return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
2657 if ( aExp || aSig ) aExp -= 0x3C01;
2658 return roundAndPackFloat64( roundData, aSign, aExp, zSig );
2670 floatx80 floatx80_round_to_int(
struct roundingData *roundData, floatx80 a )
2674 bits64 lastBitMask, roundBitsMask;
2678 aExp = extractFloatx80Exp( a );
2679 if ( 0x403E <= aExp ) {
2680 if ( ( aExp == 0x7FFF ) && (
bits64) ( extractFloatx80Frac( a )<<1 ) ) {
2681 return propagateFloatx80NaN( a, a );
2685 if ( aExp <= 0x3FFE ) {
2687 && ( (
bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
2691 aSign = extractFloatx80Sign( a );
2692 switch ( roundData->
mode ) {
2694 if ( ( aExp == 0x3FFE ) && (
bits64) ( extractFloatx80Frac( a )<<1 )
2697 packFloatx80( aSign, 0x3FFF,
LIT64( 0x8000000000000000 ) );
2703 packFloatx80( 1, 0x3FFF,
LIT64( 0x8000000000000000 ) )
2704 : packFloatx80( 0, 0, 0 );
2707 aSign ? packFloatx80( 1, 0, 0 )
2708 : packFloatx80( 0, 0x3FFF,
LIT64( 0x8000000000000000 ) );
2710 return packFloatx80( aSign, 0, 0 );
2713 lastBitMask <<= 0x403E - aExp;
2714 roundBitsMask = lastBitMask - 1;
2716 roundingMode = roundData->
mode;
2718 z.low += lastBitMask>>1;
2719 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
2722 if ( extractFloatx80Sign( z ) ^ ( roundingMode ==
float_round_up ) ) {
2723 z.low += roundBitsMask;
2726 z.low &= ~ roundBitsMask;
2729 z.low =
LIT64( 0x8000000000000000 );
2745 static floatx80 addFloatx80Sigs(
struct roundingData *roundData, floatx80 a, floatx80 b,
flag zSign )
2747 int32 aExp, bExp, zExp;
2748 bits64 aSig, bSig, zSig0, zSig1;
2751 aSig = extractFloatx80Frac( a );
2752 aExp = extractFloatx80Exp( a );
2753 bSig = extractFloatx80Frac( b );
2754 bExp = extractFloatx80Exp( b );
2755 expDiff = aExp - bExp;
2756 if ( 0 < expDiff ) {
2757 if ( aExp == 0x7FFF ) {
2758 if ( (
bits64) ( aSig<<1 ) )
return propagateFloatx80NaN( a, b );
2761 if ( bExp == 0 ) --expDiff;
2762 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2765 else if ( expDiff < 0 ) {
2766 if ( bExp == 0x7FFF ) {
2767 if ( (
bits64) ( bSig<<1 ) )
return propagateFloatx80NaN( a, b );
2768 return packFloatx80( zSign, 0x7FFF,
LIT64( 0x8000000000000000 ) );
2770 if ( aExp == 0 ) ++expDiff;
2771 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2775 if ( aExp == 0x7FFF ) {
2776 if ( (
bits64) ( ( aSig | bSig )<<1 ) ) {
2777 return propagateFloatx80NaN( a, b );
2782 zSig0 = aSig + bSig;
2784 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
2791 zSig0 = aSig + bSig;
2793 if ( (
sbits64) zSig0 < 0 )
goto roundAndPack;
2795 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
2796 zSig0 |=
LIT64( 0x8000000000000000 );
2800 roundAndPackFloatx80(
2801 roundData, zSign, zExp, zSig0, zSig1 );
2814 static floatx80 subFloatx80Sigs(
struct roundingData *roundData, floatx80 a, floatx80 b,
flag zSign )
2816 int32 aExp, bExp, zExp;
2817 bits64 aSig, bSig, zSig0, zSig1;
2821 aSig = extractFloatx80Frac( a );
2822 aExp = extractFloatx80Exp( a );
2823 bSig = extractFloatx80Frac( b );
2824 bExp = extractFloatx80Exp( b );
2825 expDiff = aExp - bExp;
2826 if ( 0 < expDiff )
goto aExpBigger;
2827 if ( expDiff < 0 )
goto bExpBigger;
2828 if ( aExp == 0x7FFF ) {
2829 if ( (
bits64) ( ( aSig | bSig )<<1 ) ) {
2830 return propagateFloatx80NaN( a, b );
2833 z.low = floatx80_default_nan_low;
2834 z.high = floatx80_default_nan_high;
2843 if ( bSig < aSig )
goto aBigger;
2844 if ( aSig < bSig )
goto bBigger;
2847 if ( bExp == 0x7FFF ) {
2848 if ( (
bits64) ( bSig<<1 ) )
return propagateFloatx80NaN( a, b );
2849 return packFloatx80( zSign ^ 1, 0x7FFF,
LIT64( 0x8000000000000000 ) );
2851 if ( aExp == 0 ) ++expDiff;
2852 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2854 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
2857 goto normalizeRoundAndPack;
2859 if ( aExp == 0x7FFF ) {
2860 if ( (
bits64) ( aSig<<1 ) )
return propagateFloatx80NaN( a, b );
2863 if ( bExp == 0 ) --expDiff;
2864 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2866 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
2868 normalizeRoundAndPack:
2870 normalizeRoundAndPackFloatx80(
2871 roundData, zSign, zExp, zSig0, zSig1 );
2882 floatx80 floatx80_add(
struct roundingData *roundData, floatx80 a, floatx80 b )
2886 aSign = extractFloatx80Sign( a );
2887 bSign = extractFloatx80Sign( b );
2888 if ( aSign == bSign ) {
2889 return addFloatx80Sigs( roundData, a, b, aSign );
2892 return subFloatx80Sigs( roundData, a, b, aSign );
2904 floatx80 floatx80_sub(
struct roundingData *roundData, floatx80 a, floatx80 b )
2908 aSign = extractFloatx80Sign( a );
2909 bSign = extractFloatx80Sign( b );
2910 if ( aSign == bSign ) {
2911 return subFloatx80Sigs( roundData, a, b, aSign );
2914 return addFloatx80Sigs( roundData, a, b, aSign );
2926 floatx80 floatx80_mul(
struct roundingData *roundData, floatx80 a, floatx80 b )
2928 flag aSign, bSign, zSign;
2929 int32 aExp, bExp, zExp;
2930 bits64 aSig, bSig, zSig0, zSig1;
2933 aSig = extractFloatx80Frac( a );
2934 aExp = extractFloatx80Exp( a );
2935 aSign = extractFloatx80Sign( a );
2936 bSig = extractFloatx80Frac( b );
2937 bExp = extractFloatx80Exp( b );
2938 bSign = extractFloatx80Sign( b );
2939 zSign = aSign ^ bSign;
2940 if ( aExp == 0x7FFF ) {
2941 if ( (
bits64) ( aSig<<1 )
2942 || ( ( bExp == 0x7FFF ) && (
bits64) ( bSig<<1 ) ) ) {
2943 return propagateFloatx80NaN( a, b );
2945 if ( ( bExp | bSig ) == 0 )
goto invalid;
2946 return packFloatx80( zSign, 0x7FFF,
LIT64( 0x8000000000000000 ) );
2948 if ( bExp == 0x7FFF ) {
2949 if ( (
bits64) ( bSig<<1 ) )
return propagateFloatx80NaN( a, b );
2950 if ( ( aExp | aSig ) == 0 ) {
2953 z.low = floatx80_default_nan_low;
2954 z.high = floatx80_default_nan_high;
2958 return packFloatx80( zSign, 0x7FFF,
LIT64( 0x8000000000000000 ) );
2961 if ( aSig == 0 )
return packFloatx80( zSign, 0, 0 );
2962 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2965 if ( bSig == 0 )
return packFloatx80( zSign, 0, 0 );
2966 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2968 zExp = aExp + bExp - 0x3FFE;
2971 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
2975 roundAndPackFloatx80(
2976 roundData, zSign, zExp, zSig0, zSig1 );
2987 floatx80 floatx80_div(
struct roundingData *roundData, floatx80 a, floatx80 b )
2989 flag aSign, bSign, zSign;
2990 int32 aExp, bExp, zExp;
2991 bits64 aSig, bSig, zSig0, zSig1;
2992 bits64 rem0, rem1, rem2, term0, term1, term2;
2995 aSig = extractFloatx80Frac( a );
2996 aExp = extractFloatx80Exp( a );
2997 aSign = extractFloatx80Sign( a );
2998 bSig = extractFloatx80Frac( b );
2999 bExp = extractFloatx80Exp( b );
3000 bSign = extractFloatx80Sign( b );
3001 zSign = aSign ^ bSign;
3002 if ( aExp == 0x7FFF ) {
3003 if ( (
bits64) ( aSig<<1 ) )
return propagateFloatx80NaN( a, b );
3004 if ( bExp == 0x7FFF ) {
3005 if ( (
bits64) ( bSig<<1 ) )
return propagateFloatx80NaN( a, b );
3008 return packFloatx80( zSign, 0x7FFF,
LIT64( 0x8000000000000000 ) );
3010 if ( bExp == 0x7FFF ) {
3011 if ( (
bits64) ( bSig<<1 ) )
return propagateFloatx80NaN( a, b );
3012 return packFloatx80( zSign, 0, 0 );
3016 if ( ( aExp | aSig ) == 0 ) {
3019 z.low = floatx80_default_nan_low;
3020 z.high = floatx80_default_nan_high;
3025 return packFloatx80( zSign, 0x7FFF,
LIT64( 0x8000000000000000 ) );
3027 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3030 if ( aSig == 0 )
return packFloatx80( zSign, 0, 0 );
3031 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3033 zExp = aExp - bExp + 0x3FFE;
3035 if ( bSig <= aSig ) {
3036 shift128Right( aSig, 0, 1, &aSig, &rem1 );
3039 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3041 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3042 while ( (
sbits64) rem0 < 0 ) {
3044 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3046 zSig1 = estimateDiv128To64( rem1, 0, bSig );
3047 if ( (
bits64) ( zSig1<<1 ) <= 8 ) {
3049 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3050 while ( (
sbits64) rem1 < 0 ) {
3052 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3054 zSig1 |= ( ( rem1 | rem2 ) != 0 );
3057 roundAndPackFloatx80(
3058 roundData, zSign, zExp, zSig0, zSig1 );
3069 floatx80 floatx80_rem(
struct roundingData *roundData, floatx80 a, floatx80 b )
3071 flag aSign, bSign, zSign;
3072 int32 aExp, bExp, expDiff;
3073 bits64 aSig0, aSig1, bSig;
3074 bits64 q, term0, term1, alternateASig0, alternateASig1;
3077 aSig0 = extractFloatx80Frac( a );
3078 aExp = extractFloatx80Exp( a );
3079 aSign = extractFloatx80Sign( a );
3080 bSig = extractFloatx80Frac( b );
3081 bExp = extractFloatx80Exp( b );
3082 bSign = extractFloatx80Sign( b );
3083 if ( aExp == 0x7FFF ) {
3084 if ( (
bits64) ( aSig0<<1 )
3085 || ( ( bExp == 0x7FFF ) && (
bits64) ( bSig<<1 ) ) ) {
3086 return propagateFloatx80NaN( a, b );
3090 if ( bExp == 0x7FFF ) {
3091 if ( (
bits64) ( bSig<<1 ) )
return propagateFloatx80NaN( a, b );
3098 z.low = floatx80_default_nan_low;
3099 z.high = floatx80_default_nan_high;
3103 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3106 if ( (
bits64) ( aSig0<<1 ) == 0 )
return a;
3107 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3109 bSig |=
LIT64( 0x8000000000000000 );
3111 expDiff = aExp - bExp;
3113 if ( expDiff < 0 ) {
3114 if ( expDiff < -1 )
return a;
3115 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
3118 q = ( bSig <= aSig0 );
3119 if ( q ) aSig0 -= bSig;
3121 while ( 0 < expDiff ) {
3122 q = estimateDiv128To64( aSig0, aSig1, bSig );
3123 q = ( 2 <
q ) ? q - 2 : 0;
3125 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3126 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3130 if ( 0 < expDiff ) {
3131 q = estimateDiv128To64( aSig0, aSig1, bSig );
3132 q = ( 2 <
q ) ? q - 2 : 0;
3134 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
3135 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3136 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
3137 while (
le128( term0, term1, aSig0, aSig1 ) ) {
3139 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3146 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
3147 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
3148 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
3151 aSig0 = alternateASig0;
3152 aSig1 = alternateASig1;
3157 normalizeRoundAndPackFloatx80(
3158 roundData, zSign, bExp + expDiff, aSig0, aSig1 );
3169 floatx80 floatx80_sqrt(
struct roundingData *roundData, floatx80 a )
3173 bits64 aSig0, aSig1, zSig0, zSig1;
3174 bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3175 bits64 shiftedRem0, shiftedRem1;
3178 aSig0 = extractFloatx80Frac( a );
3179 aExp = extractFloatx80Exp( a );
3180 aSign = extractFloatx80Sign( a );
3181 if ( aExp == 0x7FFF ) {
3182 if ( (
bits64) ( aSig0<<1 ) )
return propagateFloatx80NaN( a, a );
3183 if ( ! aSign )
return a;
3187 if ( ( aExp | aSig0 ) == 0 )
return a;
3190 z.low = floatx80_default_nan_low;
3191 z.high = floatx80_default_nan_high;
3196 if ( aSig0 == 0 )
return packFloatx80( 0, 0, 0 );
3197 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3199 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
3200 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
3203 shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 );
3204 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;
3205 if ( 0 <= (
sbits64) zSig0 ) zSig0 =
LIT64( 0xFFFFFFFFFFFFFFFF );
3206 shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );
3208 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
3209 while ( (
sbits64) rem0 < 0 ) {
3211 shortShift128Left( 0, zSig0, 1, &term0, &term1 );
3213 add128( rem0, rem1, term0, term1, &rem0, &rem1 );
3215 shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );
3216 zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );
3217 if ( (
bits64) ( zSig1<<1 ) <= 10 ) {
3218 if ( zSig1 == 0 ) zSig1 = 1;
3220 shortShift128Left( term1, term2, 1, &term1, &term2 );
3221 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3223 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
3224 while ( (
sbits64) rem1 < 0 ) {
3226 shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 );
3229 rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );
3231 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
3234 roundAndPackFloatx80(
3235 roundData, 0, zExp, zSig0, zSig1 );
3247 flag floatx80_eq( floatx80 a, floatx80 b )
3250 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3251 && (
bits64) ( extractFloatx80Frac( a )<<1 ) )
3252 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3253 && (
bits64) ( extractFloatx80Frac( b )<<1 ) )
3255 if ( floatx80_is_signaling_nan( a )
3256 || floatx80_is_signaling_nan( b ) ) {
3263 && ( ( a.high == b.high )
3265 && ( (
bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
3278 flag floatx80_le( floatx80 a, floatx80 b )
3282 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3283 && (
bits64) ( extractFloatx80Frac( a )<<1 ) )
3284 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3285 && (
bits64) ( extractFloatx80Frac( b )<<1 ) )
3290 aSign = extractFloatx80Sign( a );
3291 bSign = extractFloatx80Sign( b );
3292 if ( aSign != bSign ) {
3295 || ( ( ( (
bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3299 aSign ?
le128( b.high, b.low, a.high, a.low )
3312 flag floatx80_lt( floatx80 a, floatx80 b )
3316 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3317 && (
bits64) ( extractFloatx80Frac( a )<<1 ) )
3318 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3319 && (
bits64) ( extractFloatx80Frac( b )<<1 ) )
3324 aSign = extractFloatx80Sign( a );
3325 bSign = extractFloatx80Sign( b );
3326 if ( aSign != bSign ) {
3329 && ( ( ( (
bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3333 aSign ? lt128( b.high, b.low, a.high, a.low )
3346 flag floatx80_eq_signaling( floatx80 a, floatx80 b )
3349 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3350 && (
bits64) ( extractFloatx80Frac( a )<<1 ) )
3351 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3352 && (
bits64) ( extractFloatx80Frac( b )<<1 ) )
3359 && ( ( a.high == b.high )
3361 && ( (
bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
3374 flag floatx80_le_quiet( floatx80 a, floatx80 b )
3378 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3379 && (
bits64) ( extractFloatx80Frac( a )<<1 ) )
3380 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3381 && (
bits64) ( extractFloatx80Frac( b )<<1 ) )
3386 aSign = extractFloatx80Sign( a );
3387 bSign = extractFloatx80Sign( b );
3388 if ( aSign != bSign ) {
3391 || ( ( ( (
bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3395 aSign ?
le128( b.high, b.low, a.high, a.low )
3408 flag floatx80_lt_quiet( floatx80 a, floatx80 b )
3412 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3413 && (
bits64) ( extractFloatx80Frac( a )<<1 ) )
3414 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3415 && (
bits64) ( extractFloatx80Frac( b )<<1 ) )
3420 aSign = extractFloatx80Sign( a );
3421 bSign = extractFloatx80Sign( b );
3422 if ( aSign != bSign ) {
3425 && ( ( ( (
bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3429 aSign ? lt128( b.high, b.low, a.high, a.low )