Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
softfloat.c
Go to the documentation of this file.
1 /*
2  * Floating point emulation support for subnormalised numbers on SH4
3  * architecture This file is derived from the SoftFloat IEC/IEEE
4  * Floating-point Arithmetic Package, Release 2 the original license of
5  * which is reproduced below.
6  *
7  * ========================================================================
8  *
9  * This C source file is part of the SoftFloat IEC/IEEE Floating-point
10  * Arithmetic Package, Release 2.
11  *
12  * Written by John R. Hauser. This work was made possible in part by the
13  * International Computer Science Institute, located at Suite 600, 1947 Center
14  * Street, Berkeley, California 94704. Funding was partially provided by the
15  * National Science Foundation under grant MIP-9311980. The original version
16  * of this code was written as part of a project to build a fixed-point vector
17  * processor in collaboration with the University of California at Berkeley,
18  * overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19  * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
20  * arithmetic/softfloat.html'.
21  *
22  * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
23  * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
24  * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
25  * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
26  * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
27  *
28  * Derivative works are acceptable, even for commercial purposes, so long as
29  * (1) they include prominent notice that the work is derivative, and (2) they
30  * include prominent notice akin to these three paragraphs for those parts of
31  * this code that are retained.
32  *
33  * ========================================================================
34  *
35  * SH4 modifications by Ismail Dhaoui <[email protected]>
36  * and Kamel Khelifi <[email protected]>
37  */
38 #include <linux/kernel.h>
39 #include <cpu/fpu.h>
40 #include <asm/div64.h>
41 
42 #define LIT64( a ) a##LL
43 
44 typedef char flag;
45 typedef unsigned char uint8;
46 typedef signed char int8;
47 typedef int uint16;
48 typedef int int16;
49 typedef unsigned int uint32;
50 typedef signed int int32;
51 
52 typedef unsigned long long int bits64;
53 typedef signed long long int sbits64;
54 
55 typedef unsigned char bits8;
56 typedef signed char sbits8;
57 typedef unsigned short int bits16;
58 typedef signed short int sbits16;
59 typedef unsigned int bits32;
60 typedef signed int sbits32;
61 
62 typedef unsigned long long int uint64;
63 typedef signed long long int int64;
64 
65 typedef unsigned long int float32;
66 typedef unsigned long long float64;
67 
68 extern void float_raise(unsigned int flags); /* in fpu.c */
69 extern int float_rounding_mode(void); /* in fpu.c */
70 
77 float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
79 float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
90 void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
91  bits64 * z1Ptr);
92 void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
93  bits64 * z1Ptr);
94 void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
95 
96 static int8 countLeadingZeros32(bits32 a);
97 static int8 countLeadingZeros64(bits64 a);
98 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
99  bits64 zSig);
100 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
101 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
102 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
103 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
104  bits32 zSig);
105 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
106 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
107 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
108 static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
109  bits64 * zSigPtr);
110 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
111 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
112  bits32 * zSigPtr);
113 
115 {
116  return a & LIT64(0x000FFFFFFFFFFFFF);
117 }
118 
120 {
121  return a >> 63;
122 }
123 
125 {
126  return (a >> 52) & 0x7FF;
127 }
128 
130 {
131  return (a >> 23) & 0xFF;
132 }
133 
135 {
136  return a >> 31;
137 }
138 
140 {
141  return a & 0x007FFFFF;
142 }
143 
144 float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
145 {
146  return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
147 }
148 
150 {
151  bits64 z;
152 
153  if (count == 0) {
154  z = a;
155  } else if (count < 64) {
156  z = (a >> count) | ((a << ((-count) & 63)) != 0);
157  } else {
158  z = (a != 0);
159  }
160  *zPtr = z;
161 }
162 
163 static int8 countLeadingZeros32(bits32 a)
164 {
165  static const int8 countLeadingZerosHigh[] = {
166  8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
167  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
168  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
169  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
172  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
173  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
174  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
182  };
183  int8 shiftCount;
184 
185  shiftCount = 0;
186  if (a < 0x10000) {
187  shiftCount += 16;
188  a <<= 16;
189  }
190  if (a < 0x1000000) {
191  shiftCount += 8;
192  a <<= 8;
193  }
194  shiftCount += countLeadingZerosHigh[a >> 24];
195  return shiftCount;
196 
197 }
198 
199 static int8 countLeadingZeros64(bits64 a)
200 {
201  int8 shiftCount;
202 
203  shiftCount = 0;
204  if (a < ((bits64) 1) << 32) {
205  shiftCount += 32;
206  } else {
207  a >>= 32;
208  }
209  shiftCount += countLeadingZeros32(a);
210  return shiftCount;
211 
212 }
213 
214 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
215 {
216  int8 shiftCount;
217 
218  shiftCount = countLeadingZeros64(zSig) - 1;
219  return roundAndPackFloat64(zSign, zExp - shiftCount,
220  zSig << shiftCount);
221 
222 }
223 
224 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
225 {
226  int16 aExp, bExp, zExp;
227  bits64 aSig, bSig, zSig;
228  int16 expDiff;
229 
230  aSig = extractFloat64Frac(a);
231  aExp = extractFloat64Exp(a);
232  bSig = extractFloat64Frac(b);
233  bExp = extractFloat64Exp(b);
234  expDiff = aExp - bExp;
235  aSig <<= 10;
236  bSig <<= 10;
237  if (0 < expDiff)
238  goto aExpBigger;
239  if (expDiff < 0)
240  goto bExpBigger;
241  if (aExp == 0) {
242  aExp = 1;
243  bExp = 1;
244  }
245  if (bSig < aSig)
246  goto aBigger;
247  if (aSig < bSig)
248  goto bBigger;
249  return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
250  bExpBigger:
251  if (bExp == 0x7FF) {
252  return packFloat64(zSign ^ 1, 0x7FF, 0);
253  }
254  if (aExp == 0) {
255  ++expDiff;
256  } else {
257  aSig |= LIT64(0x4000000000000000);
258  }
259  shift64RightJamming(aSig, -expDiff, &aSig);
260  bSig |= LIT64(0x4000000000000000);
261  bBigger:
262  zSig = bSig - aSig;
263  zExp = bExp;
264  zSign ^= 1;
265  goto normalizeRoundAndPack;
266  aExpBigger:
267  if (aExp == 0x7FF) {
268  return a;
269  }
270  if (bExp == 0) {
271  --expDiff;
272  } else {
273  bSig |= LIT64(0x4000000000000000);
274  }
275  shift64RightJamming(bSig, expDiff, &bSig);
276  aSig |= LIT64(0x4000000000000000);
277  aBigger:
278  zSig = aSig - bSig;
279  zExp = aExp;
280  normalizeRoundAndPack:
281  --zExp;
282  return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
283 
284 }
285 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
286 {
287  int16 aExp, bExp, zExp;
288  bits64 aSig, bSig, zSig;
289  int16 expDiff;
290 
291  aSig = extractFloat64Frac(a);
292  aExp = extractFloat64Exp(a);
293  bSig = extractFloat64Frac(b);
294  bExp = extractFloat64Exp(b);
295  expDiff = aExp - bExp;
296  aSig <<= 9;
297  bSig <<= 9;
298  if (0 < expDiff) {
299  if (aExp == 0x7FF) {
300  return a;
301  }
302  if (bExp == 0) {
303  --expDiff;
304  } else {
305  bSig |= LIT64(0x2000000000000000);
306  }
307  shift64RightJamming(bSig, expDiff, &bSig);
308  zExp = aExp;
309  } else if (expDiff < 0) {
310  if (bExp == 0x7FF) {
311  return packFloat64(zSign, 0x7FF, 0);
312  }
313  if (aExp == 0) {
314  ++expDiff;
315  } else {
316  aSig |= LIT64(0x2000000000000000);
317  }
318  shift64RightJamming(aSig, -expDiff, &aSig);
319  zExp = bExp;
320  } else {
321  if (aExp == 0x7FF) {
322  return a;
323  }
324  if (aExp == 0)
325  return packFloat64(zSign, 0, (aSig + bSig) >> 9);
326  zSig = LIT64(0x4000000000000000) + aSig + bSig;
327  zExp = aExp;
328  goto roundAndPack;
329  }
330  aSig |= LIT64(0x2000000000000000);
331  zSig = (aSig + bSig) << 1;
332  --zExp;
333  if ((sbits64) zSig < 0) {
334  zSig = aSig + bSig;
335  ++zExp;
336  }
337  roundAndPack:
338  return roundAndPackFloat64(zSign, zExp, zSig);
339 
340 }
341 
342 float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
343 {
344  return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
345 }
346 
348 {
349  bits32 z;
350  if (count == 0) {
351  z = a;
352  } else if (count < 32) {
353  z = (a >> count) | ((a << ((-count) & 31)) != 0);
354  } else {
355  z = (a != 0);
356  }
357  *zPtr = z;
358 }
359 
360 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
361 {
362  flag roundNearestEven;
363  int8 roundIncrement, roundBits;
364  flag isTiny;
365 
366  /* SH4 has only 2 rounding modes - round to nearest and round to zero */
367  roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
368  roundIncrement = 0x40;
369  if (!roundNearestEven) {
370  roundIncrement = 0;
371  }
372  roundBits = zSig & 0x7F;
373  if (0xFD <= (bits16) zExp) {
374  if ((0xFD < zExp)
375  || ((zExp == 0xFD)
376  && ((sbits32) (zSig + roundIncrement) < 0))
377  ) {
379  return packFloat32(zSign, 0xFF,
380  0) - (roundIncrement == 0);
381  }
382  if (zExp < 0) {
383  isTiny = (zExp < -1)
384  || (zSig + roundIncrement < 0x80000000);
385  shift32RightJamming(zSig, -zExp, &zSig);
386  zExp = 0;
387  roundBits = zSig & 0x7F;
388  if (isTiny && roundBits)
390  }
391  }
392  if (roundBits)
394  zSig = (zSig + roundIncrement) >> 7;
395  zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
396  if (zSig == 0)
397  zExp = 0;
398  return packFloat32(zSign, zExp, zSig);
399 
400 }
401 
402 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
403 {
404  int8 shiftCount;
405 
406  shiftCount = countLeadingZeros32(zSig) - 1;
407  return roundAndPackFloat32(zSign, zExp - shiftCount,
408  zSig << shiftCount);
409 }
410 
411 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
412 {
413  flag roundNearestEven;
414  int16 roundIncrement, roundBits;
415  flag isTiny;
416 
417  /* SH4 has only 2 rounding modes - round to nearest and round to zero */
418  roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
419  roundIncrement = 0x200;
420  if (!roundNearestEven) {
421  roundIncrement = 0;
422  }
423  roundBits = zSig & 0x3FF;
424  if (0x7FD <= (bits16) zExp) {
425  if ((0x7FD < zExp)
426  || ((zExp == 0x7FD)
427  && ((sbits64) (zSig + roundIncrement) < 0))
428  ) {
430  return packFloat64(zSign, 0x7FF,
431  0) - (roundIncrement == 0);
432  }
433  if (zExp < 0) {
434  isTiny = (zExp < -1)
435  || (zSig + roundIncrement <
436  LIT64(0x8000000000000000));
437  shift64RightJamming(zSig, -zExp, &zSig);
438  zExp = 0;
439  roundBits = zSig & 0x3FF;
440  if (isTiny && roundBits)
442  }
443  }
444  if (roundBits)
446  zSig = (zSig + roundIncrement) >> 10;
447  zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
448  if (zSig == 0)
449  zExp = 0;
450  return packFloat64(zSign, zExp, zSig);
451 
452 }
453 
454 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
455 {
456  int16 aExp, bExp, zExp;
457  bits32 aSig, bSig, zSig;
458  int16 expDiff;
459 
460  aSig = extractFloat32Frac(a);
461  aExp = extractFloat32Exp(a);
462  bSig = extractFloat32Frac(b);
463  bExp = extractFloat32Exp(b);
464  expDiff = aExp - bExp;
465  aSig <<= 7;
466  bSig <<= 7;
467  if (0 < expDiff)
468  goto aExpBigger;
469  if (expDiff < 0)
470  goto bExpBigger;
471  if (aExp == 0) {
472  aExp = 1;
473  bExp = 1;
474  }
475  if (bSig < aSig)
476  goto aBigger;
477  if (aSig < bSig)
478  goto bBigger;
479  return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
480  bExpBigger:
481  if (bExp == 0xFF) {
482  return packFloat32(zSign ^ 1, 0xFF, 0);
483  }
484  if (aExp == 0) {
485  ++expDiff;
486  } else {
487  aSig |= 0x40000000;
488  }
489  shift32RightJamming(aSig, -expDiff, &aSig);
490  bSig |= 0x40000000;
491  bBigger:
492  zSig = bSig - aSig;
493  zExp = bExp;
494  zSign ^= 1;
495  goto normalizeRoundAndPack;
496  aExpBigger:
497  if (aExp == 0xFF) {
498  return a;
499  }
500  if (bExp == 0) {
501  --expDiff;
502  } else {
503  bSig |= 0x40000000;
504  }
505  shift32RightJamming(bSig, expDiff, &bSig);
506  aSig |= 0x40000000;
507  aBigger:
508  zSig = aSig - bSig;
509  zExp = aExp;
510  normalizeRoundAndPack:
511  --zExp;
512  return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
513 
514 }
515 
516 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
517 {
518  int16 aExp, bExp, zExp;
519  bits32 aSig, bSig, zSig;
520  int16 expDiff;
521 
522  aSig = extractFloat32Frac(a);
523  aExp = extractFloat32Exp(a);
524  bSig = extractFloat32Frac(b);
525  bExp = extractFloat32Exp(b);
526  expDiff = aExp - bExp;
527  aSig <<= 6;
528  bSig <<= 6;
529  if (0 < expDiff) {
530  if (aExp == 0xFF) {
531  return a;
532  }
533  if (bExp == 0) {
534  --expDiff;
535  } else {
536  bSig |= 0x20000000;
537  }
538  shift32RightJamming(bSig, expDiff, &bSig);
539  zExp = aExp;
540  } else if (expDiff < 0) {
541  if (bExp == 0xFF) {
542  return packFloat32(zSign, 0xFF, 0);
543  }
544  if (aExp == 0) {
545  ++expDiff;
546  } else {
547  aSig |= 0x20000000;
548  }
549  shift32RightJamming(aSig, -expDiff, &aSig);
550  zExp = bExp;
551  } else {
552  if (aExp == 0xFF) {
553  return a;
554  }
555  if (aExp == 0)
556  return packFloat32(zSign, 0, (aSig + bSig) >> 6);
557  zSig = 0x40000000 + aSig + bSig;
558  zExp = aExp;
559  goto roundAndPack;
560  }
561  aSig |= 0x20000000;
562  zSig = (aSig + bSig) << 1;
563  --zExp;
564  if ((sbits32) zSig < 0) {
565  zSig = aSig + bSig;
566  ++zExp;
567  }
568  roundAndPack:
569  return roundAndPackFloat32(zSign, zExp, zSig);
570 
571 }
572 
574 {
575  flag aSign, bSign;
576 
577  aSign = extractFloat64Sign(a);
578  bSign = extractFloat64Sign(b);
579  if (aSign == bSign) {
580  return subFloat64Sigs(a, b, aSign);
581  } else {
582  return addFloat64Sigs(a, b, aSign);
583  }
584 
585 }
586 
588 {
589  flag aSign, bSign;
590 
591  aSign = extractFloat32Sign(a);
592  bSign = extractFloat32Sign(b);
593  if (aSign == bSign) {
594  return subFloat32Sigs(a, b, aSign);
595  } else {
596  return addFloat32Sigs(a, b, aSign);
597  }
598 
599 }
600 
602 {
603  flag aSign, bSign;
604 
605  aSign = extractFloat32Sign(a);
606  bSign = extractFloat32Sign(b);
607  if (aSign == bSign) {
608  return addFloat32Sigs(a, b, aSign);
609  } else {
610  return subFloat32Sigs(a, b, aSign);
611  }
612 
613 }
614 
616 {
617  flag aSign, bSign;
618 
619  aSign = extractFloat64Sign(a);
620  bSign = extractFloat64Sign(b);
621  if (aSign == bSign) {
622  return addFloat64Sigs(a, b, aSign);
623  } else {
624  return subFloat64Sigs(a, b, aSign);
625  }
626 }
627 
628 static void
629 normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
630 {
631  int8 shiftCount;
632 
633  shiftCount = countLeadingZeros64(aSig) - 11;
634  *zSigPtr = aSig << shiftCount;
635  *zExpPtr = 1 - shiftCount;
636 }
637 
638 void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
639  bits64 * z1Ptr)
640 {
641  bits64 z1;
642 
643  z1 = a1 + b1;
644  *z1Ptr = z1;
645  *z0Ptr = a0 + b0 + (z1 < a1);
646 }
647 
648 void
650  bits64 * z1Ptr)
651 {
652  *z1Ptr = a1 - b1;
653  *z0Ptr = a0 - b0 - (a1 < b1);
654 }
655 
656 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
657 {
658  bits64 b0, b1;
659  bits64 rem0, rem1, term0, term1;
660  bits64 z, tmp;
661  if (b <= a0)
662  return LIT64(0xFFFFFFFFFFFFFFFF);
663  b0 = b >> 32;
664  tmp = a0;
665  do_div(tmp, b0);
666 
667  z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : tmp << 32;
668  mul64To128(b, z, &term0, &term1);
669  sub128(a0, a1, term0, term1, &rem0, &rem1);
670  while (((sbits64) rem0) < 0) {
671  z -= LIT64(0x100000000);
672  b1 = b << 32;
673  add128(rem0, rem1, b0, b1, &rem0, &rem1);
674  }
675  rem0 = (rem0 << 32) | (rem1 >> 32);
676  tmp = rem0;
677  do_div(tmp, b0);
678  z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : tmp;
679  return z;
680 }
681 
682 void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
683 {
684  bits32 aHigh, aLow, bHigh, bLow;
685  bits64 z0, zMiddleA, zMiddleB, z1;
686 
687  aLow = a;
688  aHigh = a >> 32;
689  bLow = b;
690  bHigh = b >> 32;
691  z1 = ((bits64) aLow) * bLow;
692  zMiddleA = ((bits64) aLow) * bHigh;
693  zMiddleB = ((bits64) aHigh) * bLow;
694  z0 = ((bits64) aHigh) * bHigh;
695  zMiddleA += zMiddleB;
696  z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
697  zMiddleA <<= 32;
698  z1 += zMiddleA;
699  z0 += (z1 < zMiddleA);
700  *z1Ptr = z1;
701  *z0Ptr = z0;
702 
703 }
704 
705 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
706  bits32 * zSigPtr)
707 {
708  int8 shiftCount;
709 
710  shiftCount = countLeadingZeros32(aSig) - 8;
711  *zSigPtr = aSig << shiftCount;
712  *zExpPtr = 1 - shiftCount;
713 
714 }
715 
717 {
718  flag aSign, bSign, zSign;
719  int16 aExp, bExp, zExp;
720  bits64 aSig, bSig, zSig;
721  bits64 rem0, rem1;
722  bits64 term0, term1;
723 
724  aSig = extractFloat64Frac(a);
725  aExp = extractFloat64Exp(a);
726  aSign = extractFloat64Sign(a);
727  bSig = extractFloat64Frac(b);
728  bExp = extractFloat64Exp(b);
729  bSign = extractFloat64Sign(b);
730  zSign = aSign ^ bSign;
731  if (aExp == 0x7FF) {
732  if (bExp == 0x7FF) {
733  }
734  return packFloat64(zSign, 0x7FF, 0);
735  }
736  if (bExp == 0x7FF) {
737  return packFloat64(zSign, 0, 0);
738  }
739  if (bExp == 0) {
740  if (bSig == 0) {
741  if ((aExp | aSig) == 0) {
743  }
744  return packFloat64(zSign, 0x7FF, 0);
745  }
746  normalizeFloat64Subnormal(bSig, &bExp, &bSig);
747  }
748  if (aExp == 0) {
749  if (aSig == 0)
750  return packFloat64(zSign, 0, 0);
751  normalizeFloat64Subnormal(aSig, &aExp, &aSig);
752  }
753  zExp = aExp - bExp + 0x3FD;
754  aSig = (aSig | LIT64(0x0010000000000000)) << 10;
755  bSig = (bSig | LIT64(0x0010000000000000)) << 11;
756  if (bSig <= (aSig + aSig)) {
757  aSig >>= 1;
758  ++zExp;
759  }
760  zSig = estimateDiv128To64(aSig, 0, bSig);
761  if ((zSig & 0x1FF) <= 2) {
762  mul64To128(bSig, zSig, &term0, &term1);
763  sub128(aSig, 0, term0, term1, &rem0, &rem1);
764  while ((sbits64) rem0 < 0) {
765  --zSig;
766  add128(rem0, rem1, 0, bSig, &rem0, &rem1);
767  }
768  zSig |= (rem1 != 0);
769  }
770  return roundAndPackFloat64(zSign, zExp, zSig);
771 
772 }
773 
775 {
776  flag aSign, bSign, zSign;
777  int16 aExp, bExp, zExp;
778  bits32 aSig, bSig;
779  uint64_t zSig;
780 
781  aSig = extractFloat32Frac(a);
782  aExp = extractFloat32Exp(a);
783  aSign = extractFloat32Sign(a);
784  bSig = extractFloat32Frac(b);
785  bExp = extractFloat32Exp(b);
786  bSign = extractFloat32Sign(b);
787  zSign = aSign ^ bSign;
788  if (aExp == 0xFF) {
789  if (bExp == 0xFF) {
790  }
791  return packFloat32(zSign, 0xFF, 0);
792  }
793  if (bExp == 0xFF) {
794  return packFloat32(zSign, 0, 0);
795  }
796  if (bExp == 0) {
797  if (bSig == 0) {
798  return packFloat32(zSign, 0xFF, 0);
799  }
800  normalizeFloat32Subnormal(bSig, &bExp, &bSig);
801  }
802  if (aExp == 0) {
803  if (aSig == 0)
804  return packFloat32(zSign, 0, 0);
805  normalizeFloat32Subnormal(aSig, &aExp, &aSig);
806  }
807  zExp = aExp - bExp + 0x7D;
808  aSig = (aSig | 0x00800000) << 7;
809  bSig = (bSig | 0x00800000) << 8;
810  if (bSig <= (aSig + aSig)) {
811  aSig >>= 1;
812  ++zExp;
813  }
814  zSig = (((bits64) aSig) << 32);
815  do_div(zSig, bSig);
816 
817  if ((zSig & 0x3F) == 0) {
818  zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
819  }
820  return roundAndPackFloat32(zSign, zExp, (bits32)zSig);
821 
822 }
823 
825 {
826  char aSign, bSign, zSign;
827  int aExp, bExp, zExp;
828  unsigned int aSig, bSig;
829  unsigned long long zSig64;
830  unsigned int zSig;
831 
832  aSig = extractFloat32Frac(a);
833  aExp = extractFloat32Exp(a);
834  aSign = extractFloat32Sign(a);
835  bSig = extractFloat32Frac(b);
836  bExp = extractFloat32Exp(b);
837  bSign = extractFloat32Sign(b);
838  zSign = aSign ^ bSign;
839  if (aExp == 0) {
840  if (aSig == 0)
841  return packFloat32(zSign, 0, 0);
842  normalizeFloat32Subnormal(aSig, &aExp, &aSig);
843  }
844  if (bExp == 0) {
845  if (bSig == 0)
846  return packFloat32(zSign, 0, 0);
847  normalizeFloat32Subnormal(bSig, &bExp, &bSig);
848  }
849  if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
850  return roundAndPackFloat32(zSign, 0xff, 0);
851 
852  zExp = aExp + bExp - 0x7F;
853  aSig = (aSig | 0x00800000) << 7;
854  bSig = (bSig | 0x00800000) << 8;
855  shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
856  zSig = zSig64;
857  if (0 <= (signed int)(zSig << 1)) {
858  zSig <<= 1;
859  --zExp;
860  }
861  return roundAndPackFloat32(zSign, zExp, zSig);
862 
863 }
864 
866 {
867  char aSign, bSign, zSign;
868  int aExp, bExp, zExp;
869  unsigned long long int aSig, bSig, zSig0, zSig1;
870 
871  aSig = extractFloat64Frac(a);
872  aExp = extractFloat64Exp(a);
873  aSign = extractFloat64Sign(a);
874  bSig = extractFloat64Frac(b);
875  bExp = extractFloat64Exp(b);
876  bSign = extractFloat64Sign(b);
877  zSign = aSign ^ bSign;
878 
879  if (aExp == 0) {
880  if (aSig == 0)
881  return packFloat64(zSign, 0, 0);
882  normalizeFloat64Subnormal(aSig, &aExp, &aSig);
883  }
884  if (bExp == 0) {
885  if (bSig == 0)
886  return packFloat64(zSign, 0, 0);
887  normalizeFloat64Subnormal(bSig, &bExp, &bSig);
888  }
889  if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
890  return roundAndPackFloat64(zSign, 0x7ff, 0);
891 
892  zExp = aExp + bExp - 0x3FF;
893  aSig = (aSig | 0x0010000000000000LL) << 10;
894  bSig = (bSig | 0x0010000000000000LL) << 11;
895  mul64To128(aSig, bSig, &zSig0, &zSig1);
896  zSig0 |= (zSig1 != 0);
897  if (0 <= (signed long long int)(zSig0 << 1)) {
898  zSig0 <<= 1;
899  --zExp;
900  }
901  return roundAndPackFloat64(zSign, zExp, zSig0);
902 }
903 
904 /*
905  * -------------------------------------------------------------------------------
906  * Returns the result of converting the double-precision floating-point value
907  * `a' to the single-precision floating-point format. The conversion is
908  * performed according to the IEC/IEEE Standard for Binary Floating-point
909  * Arithmetic.
910  * -------------------------------------------------------------------------------
911  * */
913 {
914  flag aSign;
915  int16 aExp;
916  bits64 aSig;
917  bits32 zSig;
918 
919  aSig = extractFloat64Frac( a );
920  aExp = extractFloat64Exp( a );
921  aSign = extractFloat64Sign( a );
922 
923  shift64RightJamming( aSig, 22, &aSig );
924  zSig = aSig;
925  if ( aExp || zSig ) {
926  zSig |= 0x40000000;
927  aExp -= 0x381;
928  }
929  return roundAndPackFloat32(aSign, aExp, zSig);
930 }