Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
fmpyfadd.c
Go to the documentation of this file.
1 /*
2  * Linux/PA-RISC Project (http://www.parisc-linux.org/)
3  *
4  * Floating-point emulation code
5  * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <[email protected]>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2, or (at your option)
10  * any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20  */
21 /*
22  * BEGIN_DESC
23  *
24  * File:
25  * @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $
26  *
27  * Purpose:
28  * Double Floating-point Multiply Fused Add
29  * Double Floating-point Multiply Negate Fused Add
30  * Single Floating-point Multiply Fused Add
31  * Single Floating-point Multiply Negate Fused Add
32  *
33  * External Interfaces:
34  * dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
35  * dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
36  * sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
37  * sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
38  *
39  * Internal Interfaces:
40  *
41  * Theory:
42  * <<please update with a overview of the operation of this file>>
43  *
44  * END_DESC
45 */
46 
47 
48 #include "float.h"
49 #include "sgl_float.h"
50 #include "dbl_float.h"
51 
52 
53 /*
54  * Double Floating-point Multiply Fused Add
55  */
56 
57 int
59  dbl_floating_point *src1ptr,
60  dbl_floating_point *src2ptr,
61  dbl_floating_point *src3ptr,
62  unsigned int *status,
63  dbl_floating_point *dstptr)
64 {
65  unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
66  register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
67  unsigned int rightp1, rightp2, rightp3, rightp4;
68  unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
69  register int mpy_exponent, add_exponent, count;
70  boolean inexact = FALSE, is_tiny = FALSE;
71 
72  unsigned int signlessleft1, signlessright1, save;
73  register int result_exponent, diff_exponent;
74  int sign_save, jumpsize;
75 
76  Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
77  Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
78  Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
79 
80  /*
81  * set sign bit of result of multiply
82  */
83  if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
84  Dbl_setnegativezerop1(resultp1);
85  else Dbl_setzerop1(resultp1);
86 
87  /*
88  * Generate multiply exponent
89  */
90  mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
91 
92  /*
93  * check first operand for NaN's or infinity
94  */
95  if (Dbl_isinfinity_exponent(opnd1p1)) {
96  if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
97  if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
98  Dbl_isnotnan(opnd3p1,opnd3p2)) {
99  if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
100  /*
101  * invalid since operands are infinity
102  * and zero
103  */
105  return(OPC_2E_INVALIDEXCEPTION);
106  Set_invalidflag();
107  Dbl_makequietnan(resultp1,resultp2);
108  Dbl_copytoptr(resultp1,resultp2,dstptr);
109  return(NOEXCEPTION);
110  }
111  /*
112  * Check third operand for infinity with a
113  * sign opposite of the multiply result
114  */
115  if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
116  (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
117  /*
118  * invalid since attempting a magnitude
119  * subtraction of infinities
120  */
122  return(OPC_2E_INVALIDEXCEPTION);
123  Set_invalidflag();
124  Dbl_makequietnan(resultp1,resultp2);
125  Dbl_copytoptr(resultp1,resultp2,dstptr);
126  return(NOEXCEPTION);
127  }
128 
129  /*
130  * return infinity
131  */
132  Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
133  Dbl_copytoptr(resultp1,resultp2,dstptr);
134  return(NOEXCEPTION);
135  }
136  }
137  else {
138  /*
139  * is NaN; signaling or quiet?
140  */
141  if (Dbl_isone_signaling(opnd1p1)) {
142  /* trap if INVALIDTRAP enabled */
143  if (Is_invalidtrap_enabled())
144  return(OPC_2E_INVALIDEXCEPTION);
145  /* make NaN quiet */
146  Set_invalidflag();
147  Dbl_set_quiet(opnd1p1);
148  }
149  /*
150  * is second operand a signaling NaN?
151  */
152  else if (Dbl_is_signalingnan(opnd2p1)) {
153  /* trap if INVALIDTRAP enabled */
155  return(OPC_2E_INVALIDEXCEPTION);
156  /* make NaN quiet */
157  Set_invalidflag();
158  Dbl_set_quiet(opnd2p1);
159  Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
160  return(NOEXCEPTION);
161  }
162  /*
163  * is third operand a signaling NaN?
164  */
165  else if (Dbl_is_signalingnan(opnd3p1)) {
166  /* trap if INVALIDTRAP enabled */
168  return(OPC_2E_INVALIDEXCEPTION);
169  /* make NaN quiet */
170  Set_invalidflag();
171  Dbl_set_quiet(opnd3p1);
172  Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
173  return(NOEXCEPTION);
174  }
175  /*
176  * return quiet NaN
177  */
178  Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
179  return(NOEXCEPTION);
180  }
181  }
182 
183  /*
184  * check second operand for NaN's or infinity
185  */
186  if (Dbl_isinfinity_exponent(opnd2p1)) {
187  if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
188  if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
189  if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
190  /*
191  * invalid since multiply operands are
192  * zero & infinity
193  */
195  return(OPC_2E_INVALIDEXCEPTION);
196  Set_invalidflag();
197  Dbl_makequietnan(opnd2p1,opnd2p2);
198  Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
199  return(NOEXCEPTION);
200  }
201 
202  /*
203  * Check third operand for infinity with a
204  * sign opposite of the multiply result
205  */
206  if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
207  (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
208  /*
209  * invalid since attempting a magnitude
210  * subtraction of infinities
211  */
213  return(OPC_2E_INVALIDEXCEPTION);
214  Set_invalidflag();
215  Dbl_makequietnan(resultp1,resultp2);
216  Dbl_copytoptr(resultp1,resultp2,dstptr);
217  return(NOEXCEPTION);
218  }
219 
220  /*
221  * return infinity
222  */
223  Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
224  Dbl_copytoptr(resultp1,resultp2,dstptr);
225  return(NOEXCEPTION);
226  }
227  }
228  else {
229  /*
230  * is NaN; signaling or quiet?
231  */
232  if (Dbl_isone_signaling(opnd2p1)) {
233  /* trap if INVALIDTRAP enabled */
235  return(OPC_2E_INVALIDEXCEPTION);
236  /* make NaN quiet */
237  Set_invalidflag();
238  Dbl_set_quiet(opnd2p1);
239  }
240  /*
241  * is third operand a signaling NaN?
242  */
243  else if (Dbl_is_signalingnan(opnd3p1)) {
244  /* trap if INVALIDTRAP enabled */
246  return(OPC_2E_INVALIDEXCEPTION);
247  /* make NaN quiet */
248  Set_invalidflag();
249  Dbl_set_quiet(opnd3p1);
250  Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
251  return(NOEXCEPTION);
252  }
253  /*
254  * return quiet NaN
255  */
256  Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
257  return(NOEXCEPTION);
258  }
259  }
260 
261  /*
262  * check third operand for NaN's or infinity
263  */
264  if (Dbl_isinfinity_exponent(opnd3p1)) {
265  if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
266  /* return infinity */
267  Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
268  return(NOEXCEPTION);
269  } else {
270  /*
271  * is NaN; signaling or quiet?
272  */
273  if (Dbl_isone_signaling(opnd3p1)) {
274  /* trap if INVALIDTRAP enabled */
276  return(OPC_2E_INVALIDEXCEPTION);
277  /* make NaN quiet */
278  Set_invalidflag();
279  Dbl_set_quiet(opnd3p1);
280  }
281  /*
282  * return quiet NaN
283  */
284  Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
285  return(NOEXCEPTION);
286  }
287  }
288 
289  /*
290  * Generate multiply mantissa
291  */
292  if (Dbl_isnotzero_exponent(opnd1p1)) {
293  /* set hidden bit */
295  }
296  else {
297  /* check for zero */
298  if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
299  /*
300  * Perform the add opnd3 with zero here.
301  */
302  if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
304  Dbl_or_signs(opnd3p1,resultp1);
305  } else {
306  Dbl_and_signs(opnd3p1,resultp1);
307  }
308  }
309  /*
310  * Now let's check for trapped underflow case.
311  */
312  else if (Dbl_iszero_exponent(opnd3p1) &&
314  /* need to normalize results mantissa */
315  sign_save = Dbl_signextendedsign(opnd3p1);
316  result_exponent = 0;
317  Dbl_leftshiftby1(opnd3p1,opnd3p2);
318  Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
319  Dbl_set_sign(opnd3p1,/*using*/sign_save);
320  Dbl_setwrapped_exponent(opnd3p1,result_exponent,
321  unfl);
322  Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
323  /* inexact = FALSE */
325  }
326  Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
327  return(NOEXCEPTION);
328  }
329  /* is denormalized, adjust exponent */
330  Dbl_clear_signexponent(opnd1p1);
331  Dbl_leftshiftby1(opnd1p1,opnd1p2);
332  Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
333  }
334  /* opnd2 needs to have hidden bit set with msb in hidden bit */
335  if (Dbl_isnotzero_exponent(opnd2p1)) {
337  }
338  else {
339  /* check for zero */
340  if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
341  /*
342  * Perform the add opnd3 with zero here.
343  */
344  if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
346  Dbl_or_signs(opnd3p1,resultp1);
347  } else {
348  Dbl_and_signs(opnd3p1,resultp1);
349  }
350  }
351  /*
352  * Now let's check for trapped underflow case.
353  */
354  else if (Dbl_iszero_exponent(opnd3p1) &&
356  /* need to normalize results mantissa */
357  sign_save = Dbl_signextendedsign(opnd3p1);
358  result_exponent = 0;
359  Dbl_leftshiftby1(opnd3p1,opnd3p2);
360  Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
361  Dbl_set_sign(opnd3p1,/*using*/sign_save);
362  Dbl_setwrapped_exponent(opnd3p1,result_exponent,
363  unfl);
364  Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
365  /* inexact = FALSE */
367  }
368  Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
369  return(NOEXCEPTION);
370  }
371  /* is denormalized; want to normalize */
372  Dbl_clear_signexponent(opnd2p1);
373  Dbl_leftshiftby1(opnd2p1,opnd2p2);
374  Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
375  }
376 
377  /* Multiply the first two source mantissas together */
378 
379  /*
380  * The intermediate result will be kept in tmpres,
381  * which needs enough room for 106 bits of mantissa,
382  * so lets call it a Double extended.
383  */
384  Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
385 
386  /*
387  * Four bits at a time are inspected in each loop, and a
388  * simple shift and add multiply algorithm is used.
389  */
390  for (count = DBL_P-1; count >= 0; count -= 4) {
391  Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
392  if (Dbit28p2(opnd1p2)) {
393  /* Fourword_add should be an ADD followed by 3 ADDC's */
394  Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
395  opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
396  }
397  if (Dbit29p2(opnd1p2)) {
398  Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
399  opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
400  }
401  if (Dbit30p2(opnd1p2)) {
402  Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
403  opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
404  }
405  if (Dbit31p2(opnd1p2)) {
406  Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
407  opnd2p1, opnd2p2, 0, 0);
408  }
409  Dbl_rightshiftby4(opnd1p1,opnd1p2);
410  }
411  if (Is_dexthiddenoverflow(tmpresp1)) {
412  /* result mantissa >= 2 (mantissa overflow) */
413  mpy_exponent++;
414  Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
415  }
416 
417  /*
418  * Restore the sign of the mpy result which was saved in resultp1.
419  * The exponent will continue to be kept in mpy_exponent.
420  */
421  Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
422 
423  /*
424  * No rounding is required, since the result of the multiply
425  * is exact in the extended format.
426  */
427 
428  /*
429  * Now we are ready to perform the add portion of the operation.
430  *
431  * The exponents need to be kept as integers for now, since the
432  * multiply result might not fit into the exponent field. We
433  * can't overflow or underflow because of this yet, since the
434  * add could bring the final result back into range.
435  */
436  add_exponent = Dbl_exponent(opnd3p1);
437 
438  /*
439  * Check for denormalized or zero add operand.
440  */
441  if (add_exponent == 0) {
442  /* check for zero */
443  if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
444  /* right is zero */
445  /* Left can't be zero and must be result.
446  *
447  * The final result is now in tmpres and mpy_exponent,
448  * and needs to be rounded and squeezed back into
449  * double precision format from double extended.
450  */
451  result_exponent = mpy_exponent;
452  Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
453  resultp1,resultp2,resultp3,resultp4);
454  sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
455  goto round;
456  }
457 
458  /*
459  * Neither are zeroes.
460  * Adjust exponent and normalize add operand.
461  */
462  sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
463  Dbl_clear_signexponent(opnd3p1);
464  Dbl_leftshiftby1(opnd3p1,opnd3p2);
465  Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
466  Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
467  } else {
469  }
470  /*
471  * Copy opnd3 to the double extended variable called right.
472  */
473  Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
474 
475  /*
476  * A zero "save" helps discover equal operands (for later),
477  * and is used in swapping operands (if needed).
478  */
479  Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
480 
481  /*
482  * Compare magnitude of operands.
483  */
484  Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
485  Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
486  if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
487  Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
488  /*
489  * Set the left operand to the larger one by XOR swap.
490  * First finish the first word "save".
491  */
492  Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
493  Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
494  Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
495  rightp2,rightp3,rightp4);
496  /* also setup exponents used in rest of routine */
497  diff_exponent = add_exponent - mpy_exponent;
498  result_exponent = add_exponent;
499  } else {
500  /* also setup exponents used in rest of routine */
501  diff_exponent = mpy_exponent - add_exponent;
502  result_exponent = mpy_exponent;
503  }
504  /* Invariant: left is not smaller than right. */
505 
506  /*
507  * Special case alignment of operands that would force alignment
508  * beyond the extent of the extension. A further optimization
509  * could special case this but only reduces the path length for
510  * this infrequent case.
511  */
512  if (diff_exponent > DBLEXT_THRESHOLD) {
513  diff_exponent = DBLEXT_THRESHOLD;
514  }
515 
516  /* Align right operand by shifting it to the right */
517  Dblext_clear_sign(rightp1);
518  Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
519  /*shifted by*/diff_exponent);
520 
521  /* Treat sum and difference of the operands separately. */
522  if ((int)save < 0) {
523  /*
524  * Difference of the two operands. Overflow can occur if the
525  * multiply overflowed. A borrow can occur out of the hidden
526  * bit and force a post normalization phase.
527  */
528  Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
529  rightp1,rightp2,rightp3,rightp4,
530  resultp1,resultp2,resultp3,resultp4);
531  sign_save = Dbl_signextendedsign(resultp1);
532  if (Dbl_iszero_hidden(resultp1)) {
533  /* Handle normalization */
534  /* A straightforward algorithm would now shift the
535  * result and extension left until the hidden bit
536  * becomes one. Not all of the extension bits need
537  * participate in the shift. Only the two most
538  * significant bits (round and guard) are needed.
539  * If only a single shift is needed then the guard
540  * bit becomes a significant low order bit and the
541  * extension must participate in the rounding.
542  * If more than a single shift is needed, then all
543  * bits to the right of the guard bit are zeros,
544  * and the guard bit may or may not be zero. */
545  Dblext_leftshiftby1(resultp1,resultp2,resultp3,
546  resultp4);
547 
548  /* Need to check for a zero result. The sign and
549  * exponent fields have already been zeroed. The more
550  * efficient test of the full object can be used.
551  */
552  if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
553  /* Must have been "x-x" or "x+(-x)". */
555  Dbl_setone_sign(resultp1);
556  Dbl_copytoptr(resultp1,resultp2,dstptr);
557  return(NOEXCEPTION);
558  }
559  result_exponent--;
560 
561  /* Look to see if normalization is finished. */
562  if (Dbl_isone_hidden(resultp1)) {
563  /* No further normalization is needed */
564  goto round;
565  }
566 
567  /* Discover first one bit to determine shift amount.
568  * Use a modified binary search. We have already
569  * shifted the result one position right and still
570  * not found a one so the remainder of the extension
571  * must be zero and simplifies rounding. */
572  /* Scan bytes */
573  while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
574  Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
575  result_exponent -= 8;
576  }
577  /* Now narrow it down to the nibble */
578  if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
579  /* The lower nibble contains the
580  * normalizing one */
581  Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
582  result_exponent -= 4;
583  }
584  /* Select case where first bit is set (already
585  * normalized) otherwise select the proper shift. */
586  jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
587  if (jumpsize <= 7) switch(jumpsize) {
588  case 1:
589  Dblext_leftshiftby3(resultp1,resultp2,resultp3,
590  resultp4);
591  result_exponent -= 3;
592  break;
593  case 2:
594  case 3:
595  Dblext_leftshiftby2(resultp1,resultp2,resultp3,
596  resultp4);
597  result_exponent -= 2;
598  break;
599  case 4:
600  case 5:
601  case 6:
602  case 7:
603  Dblext_leftshiftby1(resultp1,resultp2,resultp3,
604  resultp4);
605  result_exponent -= 1;
606  break;
607  }
608  } /* end if (hidden...)... */
609  /* Fall through and round */
610  } /* end if (save < 0)... */
611  else {
612  /* Add magnitudes */
613  Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
614  rightp1,rightp2,rightp3,rightp4,
615  /*to*/resultp1,resultp2,resultp3,resultp4);
616  sign_save = Dbl_signextendedsign(resultp1);
617  if (Dbl_isone_hiddenoverflow(resultp1)) {
618  /* Prenormalization required. */
619  Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
620  resultp4);
621  result_exponent++;
622  } /* end if hiddenoverflow... */
623  } /* end else ...add magnitudes... */
624 
625  /* Round the result. If the extension and lower two words are
626  * all zeros, then the result is exact. Otherwise round in the
627  * correct direction. Underflow is possible. If a postnormalization
628  * is necessary, then the mantissa is all zeros so no shift is needed.
629  */
630  round:
631  if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
632  Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
633  result_exponent,is_tiny);
634  }
635  Dbl_set_sign(resultp1,/*using*/sign_save);
636  if (Dblext_isnotzero_mantissap3(resultp3) ||
637  Dblext_isnotzero_mantissap4(resultp4)) {
638  inexact = TRUE;
639  switch(Rounding_mode()) {
640  case ROUNDNEAREST: /* The default. */
641  if (Dblext_isone_highp3(resultp3)) {
642  /* at least 1/2 ulp */
643  if (Dblext_isnotzero_low31p3(resultp3) ||
644  Dblext_isnotzero_mantissap4(resultp4) ||
645  Dblext_isone_lowp2(resultp2)) {
646  /* either exactly half way and odd or
647  * more than 1/2ulp */
648  Dbl_increment(resultp1,resultp2);
649  }
650  }
651  break;
652 
653  case ROUNDPLUS:
654  if (Dbl_iszero_sign(resultp1)) {
655  /* Round up positive results */
656  Dbl_increment(resultp1,resultp2);
657  }
658  break;
659 
660  case ROUNDMINUS:
661  if (Dbl_isone_sign(resultp1)) {
662  /* Round down negative results */
663  Dbl_increment(resultp1,resultp2);
664  }
665 
666  case ROUNDZERO:;
667  /* truncate is simple */
668  } /* end switch... */
669  if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
670  }
671  if (result_exponent >= DBL_INFINITY_EXPONENT) {
672  /* trap if OVERFLOWTRAP enabled */
673  if (Is_overflowtrap_enabled()) {
674  /*
675  * Adjust bias of result
676  */
677  Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
678  Dbl_copytoptr(resultp1,resultp2,dstptr);
679  if (inexact)
681  return (OPC_2E_OVERFLOWEXCEPTION |
683  else Set_inexactflag();
684  return (OPC_2E_OVERFLOWEXCEPTION);
685  }
686  inexact = TRUE;
688  /* set result to infinity or largest number */
689  Dbl_setoverflow(resultp1,resultp2);
690 
691  } else if (result_exponent <= 0) { /* underflow case */
692  if (Is_underflowtrap_enabled()) {
693  /*
694  * Adjust bias of result
695  */
696  Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
697  Dbl_copytoptr(resultp1,resultp2,dstptr);
698  if (inexact)
700  return (OPC_2E_UNDERFLOWEXCEPTION |
702  else Set_inexactflag();
704  }
705  else if (inexact && is_tiny) Set_underflowflag();
706  }
707  else Dbl_set_exponent(resultp1,result_exponent);
708  Dbl_copytoptr(resultp1,resultp2,dstptr);
709  if (inexact)
711  else Set_inexactflag();
712  return(NOEXCEPTION);
713 }
714 
715 /*
716  * Double Floating-point Multiply Negate Fused Add
717  */
718 
719 dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
720 
721 dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
722 unsigned int *status;
723 {
724  unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
725  register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
726  unsigned int rightp1, rightp2, rightp3, rightp4;
727  unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
728  register int mpy_exponent, add_exponent, count;
729  boolean inexact = FALSE, is_tiny = FALSE;
730 
731  unsigned int signlessleft1, signlessright1, save;
732  register int result_exponent, diff_exponent;
733  int sign_save, jumpsize;
734 
735  Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
736  Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
737  Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
738 
739  /*
740  * set sign bit of result of multiply
741  */
742  if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
743  Dbl_setzerop1(resultp1);
744  else
745  Dbl_setnegativezerop1(resultp1);
746 
747  /*
748  * Generate multiply exponent
749  */
750  mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
751 
752  /*
753  * check first operand for NaN's or infinity
754  */
755  if (Dbl_isinfinity_exponent(opnd1p1)) {
756  if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
757  if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
758  Dbl_isnotnan(opnd3p1,opnd3p2)) {
759  if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
760  /*
761  * invalid since operands are infinity
762  * and zero
763  */
765  return(OPC_2E_INVALIDEXCEPTION);
766  Set_invalidflag();
767  Dbl_makequietnan(resultp1,resultp2);
768  Dbl_copytoptr(resultp1,resultp2,dstptr);
769  return(NOEXCEPTION);
770  }
771  /*
772  * Check third operand for infinity with a
773  * sign opposite of the multiply result
774  */
775  if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
776  (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
777  /*
778  * invalid since attempting a magnitude
779  * subtraction of infinities
780  */
782  return(OPC_2E_INVALIDEXCEPTION);
783  Set_invalidflag();
784  Dbl_makequietnan(resultp1,resultp2);
785  Dbl_copytoptr(resultp1,resultp2,dstptr);
786  return(NOEXCEPTION);
787  }
788 
789  /*
790  * return infinity
791  */
792  Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
793  Dbl_copytoptr(resultp1,resultp2,dstptr);
794  return(NOEXCEPTION);
795  }
796  }
797  else {
798  /*
799  * is NaN; signaling or quiet?
800  */
801  if (Dbl_isone_signaling(opnd1p1)) {
802  /* trap if INVALIDTRAP enabled */
803  if (Is_invalidtrap_enabled())
804  return(OPC_2E_INVALIDEXCEPTION);
805  /* make NaN quiet */
806  Set_invalidflag();
807  Dbl_set_quiet(opnd1p1);
808  }
809  /*
810  * is second operand a signaling NaN?
811  */
812  else if (Dbl_is_signalingnan(opnd2p1)) {
813  /* trap if INVALIDTRAP enabled */
815  return(OPC_2E_INVALIDEXCEPTION);
816  /* make NaN quiet */
817  Set_invalidflag();
818  Dbl_set_quiet(opnd2p1);
819  Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
820  return(NOEXCEPTION);
821  }
822  /*
823  * is third operand a signaling NaN?
824  */
825  else if (Dbl_is_signalingnan(opnd3p1)) {
826  /* trap if INVALIDTRAP enabled */
828  return(OPC_2E_INVALIDEXCEPTION);
829  /* make NaN quiet */
830  Set_invalidflag();
831  Dbl_set_quiet(opnd3p1);
832  Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
833  return(NOEXCEPTION);
834  }
835  /*
836  * return quiet NaN
837  */
838  Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
839  return(NOEXCEPTION);
840  }
841  }
842 
843  /*
844  * check second operand for NaN's or infinity
845  */
846  if (Dbl_isinfinity_exponent(opnd2p1)) {
847  if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
848  if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
849  if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
850  /*
851  * invalid since multiply operands are
852  * zero & infinity
853  */
855  return(OPC_2E_INVALIDEXCEPTION);
856  Set_invalidflag();
857  Dbl_makequietnan(opnd2p1,opnd2p2);
858  Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
859  return(NOEXCEPTION);
860  }
861 
862  /*
863  * Check third operand for infinity with a
864  * sign opposite of the multiply result
865  */
866  if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
867  (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
868  /*
869  * invalid since attempting a magnitude
870  * subtraction of infinities
871  */
873  return(OPC_2E_INVALIDEXCEPTION);
874  Set_invalidflag();
875  Dbl_makequietnan(resultp1,resultp2);
876  Dbl_copytoptr(resultp1,resultp2,dstptr);
877  return(NOEXCEPTION);
878  }
879 
880  /*
881  * return infinity
882  */
883  Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
884  Dbl_copytoptr(resultp1,resultp2,dstptr);
885  return(NOEXCEPTION);
886  }
887  }
888  else {
889  /*
890  * is NaN; signaling or quiet?
891  */
892  if (Dbl_isone_signaling(opnd2p1)) {
893  /* trap if INVALIDTRAP enabled */
895  return(OPC_2E_INVALIDEXCEPTION);
896  /* make NaN quiet */
897  Set_invalidflag();
898  Dbl_set_quiet(opnd2p1);
899  }
900  /*
901  * is third operand a signaling NaN?
902  */
903  else if (Dbl_is_signalingnan(opnd3p1)) {
904  /* trap if INVALIDTRAP enabled */
906  return(OPC_2E_INVALIDEXCEPTION);
907  /* make NaN quiet */
908  Set_invalidflag();
909  Dbl_set_quiet(opnd3p1);
910  Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
911  return(NOEXCEPTION);
912  }
913  /*
914  * return quiet NaN
915  */
916  Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
917  return(NOEXCEPTION);
918  }
919  }
920 
921  /*
922  * check third operand for NaN's or infinity
923  */
924  if (Dbl_isinfinity_exponent(opnd3p1)) {
925  if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
926  /* return infinity */
927  Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
928  return(NOEXCEPTION);
929  } else {
930  /*
931  * is NaN; signaling or quiet?
932  */
933  if (Dbl_isone_signaling(opnd3p1)) {
934  /* trap if INVALIDTRAP enabled */
936  return(OPC_2E_INVALIDEXCEPTION);
937  /* make NaN quiet */
938  Set_invalidflag();
939  Dbl_set_quiet(opnd3p1);
940  }
941  /*
942  * return quiet NaN
943  */
944  Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
945  return(NOEXCEPTION);
946  }
947  }
948 
949  /*
950  * Generate multiply mantissa
951  */
952  if (Dbl_isnotzero_exponent(opnd1p1)) {
953  /* set hidden bit */
955  }
956  else {
957  /* check for zero */
958  if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
959  /*
960  * Perform the add opnd3 with zero here.
961  */
962  if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
964  Dbl_or_signs(opnd3p1,resultp1);
965  } else {
966  Dbl_and_signs(opnd3p1,resultp1);
967  }
968  }
969  /*
970  * Now let's check for trapped underflow case.
971  */
972  else if (Dbl_iszero_exponent(opnd3p1) &&
974  /* need to normalize results mantissa */
975  sign_save = Dbl_signextendedsign(opnd3p1);
976  result_exponent = 0;
977  Dbl_leftshiftby1(opnd3p1,opnd3p2);
978  Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
979  Dbl_set_sign(opnd3p1,/*using*/sign_save);
980  Dbl_setwrapped_exponent(opnd3p1,result_exponent,
981  unfl);
982  Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
983  /* inexact = FALSE */
985  }
986  Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
987  return(NOEXCEPTION);
988  }
989  /* is denormalized, adjust exponent */
990  Dbl_clear_signexponent(opnd1p1);
991  Dbl_leftshiftby1(opnd1p1,opnd1p2);
992  Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
993  }
994  /* opnd2 needs to have hidden bit set with msb in hidden bit */
995  if (Dbl_isnotzero_exponent(opnd2p1)) {
997  }
998  else {
999  /* check for zero */
1000  if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
1001  /*
1002  * Perform the add opnd3 with zero here.
1003  */
1004  if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
1006  Dbl_or_signs(opnd3p1,resultp1);
1007  } else {
1008  Dbl_and_signs(opnd3p1,resultp1);
1009  }
1010  }
1011  /*
1012  * Now let's check for trapped underflow case.
1013  */
1014  else if (Dbl_iszero_exponent(opnd3p1) &&
1016  /* need to normalize results mantissa */
1017  sign_save = Dbl_signextendedsign(opnd3p1);
1018  result_exponent = 0;
1019  Dbl_leftshiftby1(opnd3p1,opnd3p2);
1020  Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1021  Dbl_set_sign(opnd3p1,/*using*/sign_save);
1022  Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1023  unfl);
1024  Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1025  /* inexact = FALSE */
1026  return(OPC_2E_UNDERFLOWEXCEPTION);
1027  }
1028  Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1029  return(NOEXCEPTION);
1030  }
1031  /* is denormalized; want to normalize */
1032  Dbl_clear_signexponent(opnd2p1);
1033  Dbl_leftshiftby1(opnd2p1,opnd2p2);
1034  Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1035  }
1036 
1037  /* Multiply the first two source mantissas together */
1038 
1039  /*
1040  * The intermediate result will be kept in tmpres,
1041  * which needs enough room for 106 bits of mantissa,
1042  * so lets call it a Double extended.
1043  */
1044  Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1045 
1046  /*
1047  * Four bits at a time are inspected in each loop, and a
1048  * simple shift and add multiply algorithm is used.
1049  */
1050  for (count = DBL_P-1; count >= 0; count -= 4) {
1051  Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1052  if (Dbit28p2(opnd1p2)) {
1053  /* Fourword_add should be an ADD followed by 3 ADDC's */
1054  Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1055  opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1056  }
1057  if (Dbit29p2(opnd1p2)) {
1058  Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1059  opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1060  }
1061  if (Dbit30p2(opnd1p2)) {
1062  Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1063  opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1064  }
1065  if (Dbit31p2(opnd1p2)) {
1066  Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1067  opnd2p1, opnd2p2, 0, 0);
1068  }
1069  Dbl_rightshiftby4(opnd1p1,opnd1p2);
1070  }
1071  if (Is_dexthiddenoverflow(tmpresp1)) {
1072  /* result mantissa >= 2 (mantissa overflow) */
1073  mpy_exponent++;
1074  Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1075  }
1076 
1077  /*
1078  * Restore the sign of the mpy result which was saved in resultp1.
1079  * The exponent will continue to be kept in mpy_exponent.
1080  */
1081  Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1082 
1083  /*
1084  * No rounding is required, since the result of the multiply
1085  * is exact in the extended format.
1086  */
1087 
1088  /*
1089  * Now we are ready to perform the add portion of the operation.
1090  *
1091  * The exponents need to be kept as integers for now, since the
1092  * multiply result might not fit into the exponent field. We
1093  * can't overflow or underflow because of this yet, since the
1094  * add could bring the final result back into range.
1095  */
1096  add_exponent = Dbl_exponent(opnd3p1);
1097 
1098  /*
1099  * Check for denormalized or zero add operand.
1100  */
1101  if (add_exponent == 0) {
1102  /* check for zero */
1103  if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1104  /* right is zero */
1105  /* Left can't be zero and must be result.
1106  *
1107  * The final result is now in tmpres and mpy_exponent,
1108  * and needs to be rounded and squeezed back into
1109  * double precision format from double extended.
1110  */
1111  result_exponent = mpy_exponent;
1112  Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1113  resultp1,resultp2,resultp3,resultp4);
1114  sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1115  goto round;
1116  }
1117 
1118  /*
1119  * Neither are zeroes.
1120  * Adjust exponent and normalize add operand.
1121  */
1122  sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
1123  Dbl_clear_signexponent(opnd3p1);
1124  Dbl_leftshiftby1(opnd3p1,opnd3p2);
1125  Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1126  Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
1127  } else {
1129  }
1130  /*
1131  * Copy opnd3 to the double extended variable called right.
1132  */
1133  Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1134 
1135  /*
1136  * A zero "save" helps discover equal operands (for later),
1137  * and is used in swapping operands (if needed).
1138  */
1139  Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1140 
1141  /*
1142  * Compare magnitude of operands.
1143  */
1144  Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1145  Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1146  if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1147  Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1148  /*
1149  * Set the left operand to the larger one by XOR swap.
1150  * First finish the first word "save".
1151  */
1152  Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1153  Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1154  Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1155  rightp2,rightp3,rightp4);
1156  /* also setup exponents used in rest of routine */
1157  diff_exponent = add_exponent - mpy_exponent;
1158  result_exponent = add_exponent;
1159  } else {
1160  /* also setup exponents used in rest of routine */
1161  diff_exponent = mpy_exponent - add_exponent;
1162  result_exponent = mpy_exponent;
1163  }
1164  /* Invariant: left is not smaller than right. */
1165 
1166  /*
1167  * Special case alignment of operands that would force alignment
1168  * beyond the extent of the extension. A further optimization
1169  * could special case this but only reduces the path length for
1170  * this infrequent case.
1171  */
1172  if (diff_exponent > DBLEXT_THRESHOLD) {
1173  diff_exponent = DBLEXT_THRESHOLD;
1174  }
1175 
1176  /* Align right operand by shifting it to the right */
1177  Dblext_clear_sign(rightp1);
1178  Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1179  /*shifted by*/diff_exponent);
1180 
1181  /* Treat sum and difference of the operands separately. */
1182  if ((int)save < 0) {
1183  /*
1184  * Difference of the two operands. Overflow can occur if the
1185  * multiply overflowed. A borrow can occur out of the hidden
1186  * bit and force a post normalization phase.
1187  */
1188  Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1189  rightp1,rightp2,rightp3,rightp4,
1190  resultp1,resultp2,resultp3,resultp4);
1191  sign_save = Dbl_signextendedsign(resultp1);
1192  if (Dbl_iszero_hidden(resultp1)) {
1193  /* Handle normalization */
1194  /* A straightforward algorithm would now shift the
1195  * result and extension left until the hidden bit
1196  * becomes one. Not all of the extension bits need
1197  * participate in the shift. Only the two most
1198  * significant bits (round and guard) are needed.
1199  * If only a single shift is needed then the guard
1200  * bit becomes a significant low order bit and the
1201  * extension must participate in the rounding.
1202  * If more than a single shift is needed, then all
1203  * bits to the right of the guard bit are zeros,
1204  * and the guard bit may or may not be zero. */
1205  Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1206  resultp4);
1207 
1208  /* Need to check for a zero result. The sign and
1209  * exponent fields have already been zeroed. The more
1210  * efficient test of the full object can be used.
1211  */
1212  if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1213  /* Must have been "x-x" or "x+(-x)". */
1215  Dbl_setone_sign(resultp1);
1216  Dbl_copytoptr(resultp1,resultp2,dstptr);
1217  return(NOEXCEPTION);
1218  }
1219  result_exponent--;
1220 
1221  /* Look to see if normalization is finished. */
1222  if (Dbl_isone_hidden(resultp1)) {
1223  /* No further normalization is needed */
1224  goto round;
1225  }
1226 
1227  /* Discover first one bit to determine shift amount.
1228  * Use a modified binary search. We have already
1229  * shifted the result one position right and still
1230  * not found a one so the remainder of the extension
1231  * must be zero and simplifies rounding. */
1232  /* Scan bytes */
1233  while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1234  Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1235  result_exponent -= 8;
1236  }
1237  /* Now narrow it down to the nibble */
1238  if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1239  /* The lower nibble contains the
1240  * normalizing one */
1241  Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1242  result_exponent -= 4;
1243  }
1244  /* Select case where first bit is set (already
1245  * normalized) otherwise select the proper shift. */
1246  jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1247  if (jumpsize <= 7) switch(jumpsize) {
1248  case 1:
1249  Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1250  resultp4);
1251  result_exponent -= 3;
1252  break;
1253  case 2:
1254  case 3:
1255  Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1256  resultp4);
1257  result_exponent -= 2;
1258  break;
1259  case 4:
1260  case 5:
1261  case 6:
1262  case 7:
1263  Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1264  resultp4);
1265  result_exponent -= 1;
1266  break;
1267  }
1268  } /* end if (hidden...)... */
1269  /* Fall through and round */
1270  } /* end if (save < 0)... */
1271  else {
1272  /* Add magnitudes */
1273  Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1274  rightp1,rightp2,rightp3,rightp4,
1275  /*to*/resultp1,resultp2,resultp3,resultp4);
1276  sign_save = Dbl_signextendedsign(resultp1);
1277  if (Dbl_isone_hiddenoverflow(resultp1)) {
1278  /* Prenormalization required. */
1279  Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1280  resultp4);
1281  result_exponent++;
1282  } /* end if hiddenoverflow... */
1283  } /* end else ...add magnitudes... */
1284 
1285  /* Round the result. If the extension and lower two words are
1286  * all zeros, then the result is exact. Otherwise round in the
1287  * correct direction. Underflow is possible. If a postnormalization
1288  * is necessary, then the mantissa is all zeros so no shift is needed.
1289  */
1290  round:
1291  if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1292  Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1293  result_exponent,is_tiny);
1294  }
1295  Dbl_set_sign(resultp1,/*using*/sign_save);
1296  if (Dblext_isnotzero_mantissap3(resultp3) ||
1297  Dblext_isnotzero_mantissap4(resultp4)) {
1298  inexact = TRUE;
1299  switch(Rounding_mode()) {
1300  case ROUNDNEAREST: /* The default. */
1301  if (Dblext_isone_highp3(resultp3)) {
1302  /* at least 1/2 ulp */
1303  if (Dblext_isnotzero_low31p3(resultp3) ||
1304  Dblext_isnotzero_mantissap4(resultp4) ||
1305  Dblext_isone_lowp2(resultp2)) {
1306  /* either exactly half way and odd or
1307  * more than 1/2ulp */
1308  Dbl_increment(resultp1,resultp2);
1309  }
1310  }
1311  break;
1312 
1313  case ROUNDPLUS:
1314  if (Dbl_iszero_sign(resultp1)) {
1315  /* Round up positive results */
1316  Dbl_increment(resultp1,resultp2);
1317  }
1318  break;
1319 
1320  case ROUNDMINUS:
1321  if (Dbl_isone_sign(resultp1)) {
1322  /* Round down negative results */
1323  Dbl_increment(resultp1,resultp2);
1324  }
1325 
1326  case ROUNDZERO:;
1327  /* truncate is simple */
1328  } /* end switch... */
1329  if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1330  }
1331  if (result_exponent >= DBL_INFINITY_EXPONENT) {
1332  /* Overflow */
1333  if (Is_overflowtrap_enabled()) {
1334  /*
1335  * Adjust bias of result
1336  */
1337  Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1338  Dbl_copytoptr(resultp1,resultp2,dstptr);
1339  if (inexact)
1340  if (Is_inexacttrap_enabled())
1341  return (OPC_2E_OVERFLOWEXCEPTION |
1343  else Set_inexactflag();
1344  return (OPC_2E_OVERFLOWEXCEPTION);
1345  }
1346  inexact = TRUE;
1347  Set_overflowflag();
1348  Dbl_setoverflow(resultp1,resultp2);
1349  } else if (result_exponent <= 0) { /* underflow case */
1350  if (Is_underflowtrap_enabled()) {
1351  /*
1352  * Adjust bias of result
1353  */
1354  Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1355  Dbl_copytoptr(resultp1,resultp2,dstptr);
1356  if (inexact)
1357  if (Is_inexacttrap_enabled())
1358  return (OPC_2E_UNDERFLOWEXCEPTION |
1360  else Set_inexactflag();
1361  return(OPC_2E_UNDERFLOWEXCEPTION);
1362  }
1363  else if (inexact && is_tiny) Set_underflowflag();
1364  }
1365  else Dbl_set_exponent(resultp1,result_exponent);
1366  Dbl_copytoptr(resultp1,resultp2,dstptr);
1367  if (inexact)
1369  else Set_inexactflag();
1370  return(NOEXCEPTION);
1371 }
1372 
1373 /*
1374  * Single Floating-point Multiply Fused Add
1375  */
1376 
1377 sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1378 
1379 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1380 unsigned int *status;
1381 {
1382  unsigned int opnd1, opnd2, opnd3;
1383  register unsigned int tmpresp1, tmpresp2;
1384  unsigned int rightp1, rightp2;
1385  unsigned int resultp1, resultp2 = 0;
1386  register int mpy_exponent, add_exponent, count;
1387  boolean inexact = FALSE, is_tiny = FALSE;
1388 
1389  unsigned int signlessleft1, signlessright1, save;
1390  register int result_exponent, diff_exponent;
1391  int sign_save, jumpsize;
1392 
1393  Sgl_copyfromptr(src1ptr,opnd1);
1394  Sgl_copyfromptr(src2ptr,opnd2);
1395  Sgl_copyfromptr(src3ptr,opnd3);
1396 
1397  /*
1398  * set sign bit of result of multiply
1399  */
1400  if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
1401  Sgl_setnegativezero(resultp1);
1402  else Sgl_setzero(resultp1);
1403 
1404  /*
1405  * Generate multiply exponent
1406  */
1407  mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1408 
1409  /*
1410  * check first operand for NaN's or infinity
1411  */
1412  if (Sgl_isinfinity_exponent(opnd1)) {
1413  if (Sgl_iszero_mantissa(opnd1)) {
1414  if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1415  if (Sgl_iszero_exponentmantissa(opnd2)) {
1416  /*
1417  * invalid since operands are infinity
1418  * and zero
1419  */
1420  if (Is_invalidtrap_enabled())
1421  return(OPC_2E_INVALIDEXCEPTION);
1422  Set_invalidflag();
1423  Sgl_makequietnan(resultp1);
1424  Sgl_copytoptr(resultp1,dstptr);
1425  return(NOEXCEPTION);
1426  }
1427  /*
1428  * Check third operand for infinity with a
1429  * sign opposite of the multiply result
1430  */
1431  if (Sgl_isinfinity(opnd3) &&
1432  (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1433  /*
1434  * invalid since attempting a magnitude
1435  * subtraction of infinities
1436  */
1437  if (Is_invalidtrap_enabled())
1438  return(OPC_2E_INVALIDEXCEPTION);
1439  Set_invalidflag();
1440  Sgl_makequietnan(resultp1);
1441  Sgl_copytoptr(resultp1,dstptr);
1442  return(NOEXCEPTION);
1443  }
1444 
1445  /*
1446  * return infinity
1447  */
1449  Sgl_copytoptr(resultp1,dstptr);
1450  return(NOEXCEPTION);
1451  }
1452  }
1453  else {
1454  /*
1455  * is NaN; signaling or quiet?
1456  */
1457  if (Sgl_isone_signaling(opnd1)) {
1458  /* trap if INVALIDTRAP enabled */
1459  if (Is_invalidtrap_enabled())
1460  return(OPC_2E_INVALIDEXCEPTION);
1461  /* make NaN quiet */
1462  Set_invalidflag();
1463  Sgl_set_quiet(opnd1);
1464  }
1465  /*
1466  * is second operand a signaling NaN?
1467  */
1468  else if (Sgl_is_signalingnan(opnd2)) {
1469  /* trap if INVALIDTRAP enabled */
1470  if (Is_invalidtrap_enabled())
1471  return(OPC_2E_INVALIDEXCEPTION);
1472  /* make NaN quiet */
1473  Set_invalidflag();
1474  Sgl_set_quiet(opnd2);
1475  Sgl_copytoptr(opnd2,dstptr);
1476  return(NOEXCEPTION);
1477  }
1478  /*
1479  * is third operand a signaling NaN?
1480  */
1481  else if (Sgl_is_signalingnan(opnd3)) {
1482  /* trap if INVALIDTRAP enabled */
1483  if (Is_invalidtrap_enabled())
1484  return(OPC_2E_INVALIDEXCEPTION);
1485  /* make NaN quiet */
1486  Set_invalidflag();
1487  Sgl_set_quiet(opnd3);
1488  Sgl_copytoptr(opnd3,dstptr);
1489  return(NOEXCEPTION);
1490  }
1491  /*
1492  * return quiet NaN
1493  */
1494  Sgl_copytoptr(opnd1,dstptr);
1495  return(NOEXCEPTION);
1496  }
1497  }
1498 
1499  /*
1500  * check second operand for NaN's or infinity
1501  */
1502  if (Sgl_isinfinity_exponent(opnd2)) {
1503  if (Sgl_iszero_mantissa(opnd2)) {
1504  if (Sgl_isnotnan(opnd3)) {
1505  if (Sgl_iszero_exponentmantissa(opnd1)) {
1506  /*
1507  * invalid since multiply operands are
1508  * zero & infinity
1509  */
1510  if (Is_invalidtrap_enabled())
1511  return(OPC_2E_INVALIDEXCEPTION);
1512  Set_invalidflag();
1513  Sgl_makequietnan(opnd2);
1514  Sgl_copytoptr(opnd2,dstptr);
1515  return(NOEXCEPTION);
1516  }
1517 
1518  /*
1519  * Check third operand for infinity with a
1520  * sign opposite of the multiply result
1521  */
1522  if (Sgl_isinfinity(opnd3) &&
1523  (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1524  /*
1525  * invalid since attempting a magnitude
1526  * subtraction of infinities
1527  */
1528  if (Is_invalidtrap_enabled())
1529  return(OPC_2E_INVALIDEXCEPTION);
1530  Set_invalidflag();
1531  Sgl_makequietnan(resultp1);
1532  Sgl_copytoptr(resultp1,dstptr);
1533  return(NOEXCEPTION);
1534  }
1535 
1536  /*
1537  * return infinity
1538  */
1540  Sgl_copytoptr(resultp1,dstptr);
1541  return(NOEXCEPTION);
1542  }
1543  }
1544  else {
1545  /*
1546  * is NaN; signaling or quiet?
1547  */
1548  if (Sgl_isone_signaling(opnd2)) {
1549  /* trap if INVALIDTRAP enabled */
1550  if (Is_invalidtrap_enabled())
1551  return(OPC_2E_INVALIDEXCEPTION);
1552  /* make NaN quiet */
1553  Set_invalidflag();
1554  Sgl_set_quiet(opnd2);
1555  }
1556  /*
1557  * is third operand a signaling NaN?
1558  */
1559  else if (Sgl_is_signalingnan(opnd3)) {
1560  /* trap if INVALIDTRAP enabled */
1561  if (Is_invalidtrap_enabled())
1562  return(OPC_2E_INVALIDEXCEPTION);
1563  /* make NaN quiet */
1564  Set_invalidflag();
1565  Sgl_set_quiet(opnd3);
1566  Sgl_copytoptr(opnd3,dstptr);
1567  return(NOEXCEPTION);
1568  }
1569  /*
1570  * return quiet NaN
1571  */
1572  Sgl_copytoptr(opnd2,dstptr);
1573  return(NOEXCEPTION);
1574  }
1575  }
1576 
1577  /*
1578  * check third operand for NaN's or infinity
1579  */
1580  if (Sgl_isinfinity_exponent(opnd3)) {
1581  if (Sgl_iszero_mantissa(opnd3)) {
1582  /* return infinity */
1583  Sgl_copytoptr(opnd3,dstptr);
1584  return(NOEXCEPTION);
1585  } else {
1586  /*
1587  * is NaN; signaling or quiet?
1588  */
1589  if (Sgl_isone_signaling(opnd3)) {
1590  /* trap if INVALIDTRAP enabled */
1591  if (Is_invalidtrap_enabled())
1592  return(OPC_2E_INVALIDEXCEPTION);
1593  /* make NaN quiet */
1594  Set_invalidflag();
1595  Sgl_set_quiet(opnd3);
1596  }
1597  /*
1598  * return quiet NaN
1599  */
1600  Sgl_copytoptr(opnd3,dstptr);
1601  return(NOEXCEPTION);
1602  }
1603  }
1604 
1605  /*
1606  * Generate multiply mantissa
1607  */
1608  if (Sgl_isnotzero_exponent(opnd1)) {
1609  /* set hidden bit */
1611  }
1612  else {
1613  /* check for zero */
1614  if (Sgl_iszero_mantissa(opnd1)) {
1615  /*
1616  * Perform the add opnd3 with zero here.
1617  */
1618  if (Sgl_iszero_exponentmantissa(opnd3)) {
1620  Sgl_or_signs(opnd3,resultp1);
1621  } else {
1622  Sgl_and_signs(opnd3,resultp1);
1623  }
1624  }
1625  /*
1626  * Now let's check for trapped underflow case.
1627  */
1628  else if (Sgl_iszero_exponent(opnd3) &&
1630  /* need to normalize results mantissa */
1631  sign_save = Sgl_signextendedsign(opnd3);
1632  result_exponent = 0;
1633  Sgl_leftshiftby1(opnd3);
1634  Sgl_normalize(opnd3,result_exponent);
1635  Sgl_set_sign(opnd3,/*using*/sign_save);
1636  Sgl_setwrapped_exponent(opnd3,result_exponent,
1637  unfl);
1638  Sgl_copytoptr(opnd3,dstptr);
1639  /* inexact = FALSE */
1640  return(OPC_2E_UNDERFLOWEXCEPTION);
1641  }
1642  Sgl_copytoptr(opnd3,dstptr);
1643  return(NOEXCEPTION);
1644  }
1645  /* is denormalized, adjust exponent */
1646  Sgl_clear_signexponent(opnd1);
1647  Sgl_leftshiftby1(opnd1);
1648  Sgl_normalize(opnd1,mpy_exponent);
1649  }
1650  /* opnd2 needs to have hidden bit set with msb in hidden bit */
1651  if (Sgl_isnotzero_exponent(opnd2)) {
1653  }
1654  else {
1655  /* check for zero */
1656  if (Sgl_iszero_mantissa(opnd2)) {
1657  /*
1658  * Perform the add opnd3 with zero here.
1659  */
1660  if (Sgl_iszero_exponentmantissa(opnd3)) {
1662  Sgl_or_signs(opnd3,resultp1);
1663  } else {
1664  Sgl_and_signs(opnd3,resultp1);
1665  }
1666  }
1667  /*
1668  * Now let's check for trapped underflow case.
1669  */
1670  else if (Sgl_iszero_exponent(opnd3) &&
1672  /* need to normalize results mantissa */
1673  sign_save = Sgl_signextendedsign(opnd3);
1674  result_exponent = 0;
1675  Sgl_leftshiftby1(opnd3);
1676  Sgl_normalize(opnd3,result_exponent);
1677  Sgl_set_sign(opnd3,/*using*/sign_save);
1678  Sgl_setwrapped_exponent(opnd3,result_exponent,
1679  unfl);
1680  Sgl_copytoptr(opnd3,dstptr);
1681  /* inexact = FALSE */
1682  return(OPC_2E_UNDERFLOWEXCEPTION);
1683  }
1684  Sgl_copytoptr(opnd3,dstptr);
1685  return(NOEXCEPTION);
1686  }
1687  /* is denormalized; want to normalize */
1688  Sgl_clear_signexponent(opnd2);
1689  Sgl_leftshiftby1(opnd2);
1690  Sgl_normalize(opnd2,mpy_exponent);
1691  }
1692 
1693  /* Multiply the first two source mantissas together */
1694 
1695  /*
1696  * The intermediate result will be kept in tmpres,
1697  * which needs enough room for 106 bits of mantissa,
1698  * so lets call it a Double extended.
1699  */
1700  Sglext_setzero(tmpresp1,tmpresp2);
1701 
1702  /*
1703  * Four bits at a time are inspected in each loop, and a
1704  * simple shift and add multiply algorithm is used.
1705  */
1706  for (count = SGL_P-1; count >= 0; count -= 4) {
1707  Sglext_rightshiftby4(tmpresp1,tmpresp2);
1708  if (Sbit28(opnd1)) {
1709  /* Twoword_add should be an ADD followed by 2 ADDC's */
1710  Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1711  }
1712  if (Sbit29(opnd1)) {
1713  Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1714  }
1715  if (Sbit30(opnd1)) {
1716  Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1717  }
1718  if (Sbit31(opnd1)) {
1719  Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1720  }
1721  Sgl_rightshiftby4(opnd1);
1722  }
1723  if (Is_sexthiddenoverflow(tmpresp1)) {
1724  /* result mantissa >= 2 (mantissa overflow) */
1725  mpy_exponent++;
1726  Sglext_rightshiftby4(tmpresp1,tmpresp2);
1727  } else {
1728  Sglext_rightshiftby3(tmpresp1,tmpresp2);
1729  }
1730 
1731  /*
1732  * Restore the sign of the mpy result which was saved in resultp1.
1733  * The exponent will continue to be kept in mpy_exponent.
1734  */
1735  Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1736 
1737  /*
1738  * No rounding is required, since the result of the multiply
1739  * is exact in the extended format.
1740  */
1741 
1742  /*
1743  * Now we are ready to perform the add portion of the operation.
1744  *
1745  * The exponents need to be kept as integers for now, since the
1746  * multiply result might not fit into the exponent field. We
1747  * can't overflow or underflow because of this yet, since the
1748  * add could bring the final result back into range.
1749  */
1750  add_exponent = Sgl_exponent(opnd3);
1751 
1752  /*
1753  * Check for denormalized or zero add operand.
1754  */
1755  if (add_exponent == 0) {
1756  /* check for zero */
1757  if (Sgl_iszero_mantissa(opnd3)) {
1758  /* right is zero */
1759  /* Left can't be zero and must be result.
1760  *
1761  * The final result is now in tmpres and mpy_exponent,
1762  * and needs to be rounded and squeezed back into
1763  * double precision format from double extended.
1764  */
1765  result_exponent = mpy_exponent;
1766  Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1767  sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1768  goto round;
1769  }
1770 
1771  /*
1772  * Neither are zeroes.
1773  * Adjust exponent and normalize add operand.
1774  */
1775  sign_save = Sgl_signextendedsign(opnd3); /* save sign */
1776  Sgl_clear_signexponent(opnd3);
1777  Sgl_leftshiftby1(opnd3);
1778  Sgl_normalize(opnd3,add_exponent);
1779  Sgl_set_sign(opnd3,sign_save); /* restore sign */
1780  } else {
1782  }
1783  /*
1784  * Copy opnd3 to the double extended variable called right.
1785  */
1786  Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1787 
1788  /*
1789  * A zero "save" helps discover equal operands (for later),
1790  * and is used in swapping operands (if needed).
1791  */
1792  Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1793 
1794  /*
1795  * Compare magnitude of operands.
1796  */
1797  Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1798  Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1799  if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1800  Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1801  /*
1802  * Set the left operand to the larger one by XOR swap.
1803  * First finish the first word "save".
1804  */
1805  Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1806  Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1807  Sglext_swap_lower(tmpresp2,rightp2);
1808  /* also setup exponents used in rest of routine */
1809  diff_exponent = add_exponent - mpy_exponent;
1810  result_exponent = add_exponent;
1811  } else {
1812  /* also setup exponents used in rest of routine */
1813  diff_exponent = mpy_exponent - add_exponent;
1814  result_exponent = mpy_exponent;
1815  }
1816  /* Invariant: left is not smaller than right. */
1817 
1818  /*
1819  * Special case alignment of operands that would force alignment
1820  * beyond the extent of the extension. A further optimization
1821  * could special case this but only reduces the path length for
1822  * this infrequent case.
1823  */
1824  if (diff_exponent > SGLEXT_THRESHOLD) {
1825  diff_exponent = SGLEXT_THRESHOLD;
1826  }
1827 
1828  /* Align right operand by shifting it to the right */
1829  Sglext_clear_sign(rightp1);
1830  Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1831 
1832  /* Treat sum and difference of the operands separately. */
1833  if ((int)save < 0) {
1834  /*
1835  * Difference of the two operands. Overflow can occur if the
1836  * multiply overflowed. A borrow can occur out of the hidden
1837  * bit and force a post normalization phase.
1838  */
1839  Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1840  resultp1,resultp2);
1841  sign_save = Sgl_signextendedsign(resultp1);
1842  if (Sgl_iszero_hidden(resultp1)) {
1843  /* Handle normalization */
1844  /* A straightforward algorithm would now shift the
1845  * result and extension left until the hidden bit
1846  * becomes one. Not all of the extension bits need
1847  * participate in the shift. Only the two most
1848  * significant bits (round and guard) are needed.
1849  * If only a single shift is needed then the guard
1850  * bit becomes a significant low order bit and the
1851  * extension must participate in the rounding.
1852  * If more than a single shift is needed, then all
1853  * bits to the right of the guard bit are zeros,
1854  * and the guard bit may or may not be zero. */
1855  Sglext_leftshiftby1(resultp1,resultp2);
1856 
1857  /* Need to check for a zero result. The sign and
1858  * exponent fields have already been zeroed. The more
1859  * efficient test of the full object can be used.
1860  */
1861  if (Sglext_iszero(resultp1,resultp2)) {
1862  /* Must have been "x-x" or "x+(-x)". */
1864  Sgl_setone_sign(resultp1);
1865  Sgl_copytoptr(resultp1,dstptr);
1866  return(NOEXCEPTION);
1867  }
1868  result_exponent--;
1869 
1870  /* Look to see if normalization is finished. */
1871  if (Sgl_isone_hidden(resultp1)) {
1872  /* No further normalization is needed */
1873  goto round;
1874  }
1875 
1876  /* Discover first one bit to determine shift amount.
1877  * Use a modified binary search. We have already
1878  * shifted the result one position right and still
1879  * not found a one so the remainder of the extension
1880  * must be zero and simplifies rounding. */
1881  /* Scan bytes */
1882  while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1883  Sglext_leftshiftby8(resultp1,resultp2);
1884  result_exponent -= 8;
1885  }
1886  /* Now narrow it down to the nibble */
1887  if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1888  /* The lower nibble contains the
1889  * normalizing one */
1890  Sglext_leftshiftby4(resultp1,resultp2);
1891  result_exponent -= 4;
1892  }
1893  /* Select case where first bit is set (already
1894  * normalized) otherwise select the proper shift. */
1895  jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1896  if (jumpsize <= 7) switch(jumpsize) {
1897  case 1:
1898  Sglext_leftshiftby3(resultp1,resultp2);
1899  result_exponent -= 3;
1900  break;
1901  case 2:
1902  case 3:
1903  Sglext_leftshiftby2(resultp1,resultp2);
1904  result_exponent -= 2;
1905  break;
1906  case 4:
1907  case 5:
1908  case 6:
1909  case 7:
1910  Sglext_leftshiftby1(resultp1,resultp2);
1911  result_exponent -= 1;
1912  break;
1913  }
1914  } /* end if (hidden...)... */
1915  /* Fall through and round */
1916  } /* end if (save < 0)... */
1917  else {
1918  /* Add magnitudes */
1919  Sglext_addition(tmpresp1,tmpresp2,
1920  rightp1,rightp2, /*to*/resultp1,resultp2);
1921  sign_save = Sgl_signextendedsign(resultp1);
1922  if (Sgl_isone_hiddenoverflow(resultp1)) {
1923  /* Prenormalization required. */
1924  Sglext_arithrightshiftby1(resultp1,resultp2);
1925  result_exponent++;
1926  } /* end if hiddenoverflow... */
1927  } /* end else ...add magnitudes... */
1928 
1929  /* Round the result. If the extension and lower two words are
1930  * all zeros, then the result is exact. Otherwise round in the
1931  * correct direction. Underflow is possible. If a postnormalization
1932  * is necessary, then the mantissa is all zeros so no shift is needed.
1933  */
1934  round:
1935  if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1936  Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1937  }
1938  Sgl_set_sign(resultp1,/*using*/sign_save);
1939  if (Sglext_isnotzero_mantissap2(resultp2)) {
1940  inexact = TRUE;
1941  switch(Rounding_mode()) {
1942  case ROUNDNEAREST: /* The default. */
1943  if (Sglext_isone_highp2(resultp2)) {
1944  /* at least 1/2 ulp */
1945  if (Sglext_isnotzero_low31p2(resultp2) ||
1946  Sglext_isone_lowp1(resultp1)) {
1947  /* either exactly half way and odd or
1948  * more than 1/2ulp */
1949  Sgl_increment(resultp1);
1950  }
1951  }
1952  break;
1953 
1954  case ROUNDPLUS:
1955  if (Sgl_iszero_sign(resultp1)) {
1956  /* Round up positive results */
1957  Sgl_increment(resultp1);
1958  }
1959  break;
1960 
1961  case ROUNDMINUS:
1962  if (Sgl_isone_sign(resultp1)) {
1963  /* Round down negative results */
1964  Sgl_increment(resultp1);
1965  }
1966 
1967  case ROUNDZERO:;
1968  /* truncate is simple */
1969  } /* end switch... */
1970  if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1971  }
1972  if (result_exponent >= SGL_INFINITY_EXPONENT) {
1973  /* Overflow */
1974  if (Is_overflowtrap_enabled()) {
1975  /*
1976  * Adjust bias of result
1977  */
1978  Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1979  Sgl_copytoptr(resultp1,dstptr);
1980  if (inexact)
1981  if (Is_inexacttrap_enabled())
1982  return (OPC_2E_OVERFLOWEXCEPTION |
1984  else Set_inexactflag();
1985  return (OPC_2E_OVERFLOWEXCEPTION);
1986  }
1987  inexact = TRUE;
1988  Set_overflowflag();
1989  Sgl_setoverflow(resultp1);
1990  } else if (result_exponent <= 0) { /* underflow case */
1991  if (Is_underflowtrap_enabled()) {
1992  /*
1993  * Adjust bias of result
1994  */
1995  Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1996  Sgl_copytoptr(resultp1,dstptr);
1997  if (inexact)
1998  if (Is_inexacttrap_enabled())
1999  return (OPC_2E_UNDERFLOWEXCEPTION |
2001  else Set_inexactflag();
2002  return(OPC_2E_UNDERFLOWEXCEPTION);
2003  }
2004  else if (inexact && is_tiny) Set_underflowflag();
2005  }
2006  else Sgl_set_exponent(resultp1,result_exponent);
2007  Sgl_copytoptr(resultp1,dstptr);
2008  if (inexact)
2010  else Set_inexactflag();
2011  return(NOEXCEPTION);
2012 }
2013 
2014 /*
2015  * Single Floating-point Multiply Negate Fused Add
2016  */
2017 
2018 sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2019 
2020 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2021 unsigned int *status;
2022 {
2023  unsigned int opnd1, opnd2, opnd3;
2024  register unsigned int tmpresp1, tmpresp2;
2025  unsigned int rightp1, rightp2;
2026  unsigned int resultp1, resultp2 = 0;
2027  register int mpy_exponent, add_exponent, count;
2028  boolean inexact = FALSE, is_tiny = FALSE;
2029 
2030  unsigned int signlessleft1, signlessright1, save;
2031  register int result_exponent, diff_exponent;
2032  int sign_save, jumpsize;
2033 
2034  Sgl_copyfromptr(src1ptr,opnd1);
2035  Sgl_copyfromptr(src2ptr,opnd2);
2036  Sgl_copyfromptr(src3ptr,opnd3);
2037 
2038  /*
2039  * set sign bit of result of multiply
2040  */
2041  if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
2042  Sgl_setzero(resultp1);
2043  else
2044  Sgl_setnegativezero(resultp1);
2045 
2046  /*
2047  * Generate multiply exponent
2048  */
2049  mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2050 
2051  /*
2052  * check first operand for NaN's or infinity
2053  */
2054  if (Sgl_isinfinity_exponent(opnd1)) {
2055  if (Sgl_iszero_mantissa(opnd1)) {
2056  if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2057  if (Sgl_iszero_exponentmantissa(opnd2)) {
2058  /*
2059  * invalid since operands are infinity
2060  * and zero
2061  */
2062  if (Is_invalidtrap_enabled())
2063  return(OPC_2E_INVALIDEXCEPTION);
2064  Set_invalidflag();
2065  Sgl_makequietnan(resultp1);
2066  Sgl_copytoptr(resultp1,dstptr);
2067  return(NOEXCEPTION);
2068  }
2069  /*
2070  * Check third operand for infinity with a
2071  * sign opposite of the multiply result
2072  */
2073  if (Sgl_isinfinity(opnd3) &&
2074  (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2075  /*
2076  * invalid since attempting a magnitude
2077  * subtraction of infinities
2078  */
2079  if (Is_invalidtrap_enabled())
2080  return(OPC_2E_INVALIDEXCEPTION);
2081  Set_invalidflag();
2082  Sgl_makequietnan(resultp1);
2083  Sgl_copytoptr(resultp1,dstptr);
2084  return(NOEXCEPTION);
2085  }
2086 
2087  /*
2088  * return infinity
2089  */
2091  Sgl_copytoptr(resultp1,dstptr);
2092  return(NOEXCEPTION);
2093  }
2094  }
2095  else {
2096  /*
2097  * is NaN; signaling or quiet?
2098  */
2099  if (Sgl_isone_signaling(opnd1)) {
2100  /* trap if INVALIDTRAP enabled */
2101  if (Is_invalidtrap_enabled())
2102  return(OPC_2E_INVALIDEXCEPTION);
2103  /* make NaN quiet */
2104  Set_invalidflag();
2105  Sgl_set_quiet(opnd1);
2106  }
2107  /*
2108  * is second operand a signaling NaN?
2109  */
2110  else if (Sgl_is_signalingnan(opnd2)) {
2111  /* trap if INVALIDTRAP enabled */
2112  if (Is_invalidtrap_enabled())
2113  return(OPC_2E_INVALIDEXCEPTION);
2114  /* make NaN quiet */
2115  Set_invalidflag();
2116  Sgl_set_quiet(opnd2);
2117  Sgl_copytoptr(opnd2,dstptr);
2118  return(NOEXCEPTION);
2119  }
2120  /*
2121  * is third operand a signaling NaN?
2122  */
2123  else if (Sgl_is_signalingnan(opnd3)) {
2124  /* trap if INVALIDTRAP enabled */
2125  if (Is_invalidtrap_enabled())
2126  return(OPC_2E_INVALIDEXCEPTION);
2127  /* make NaN quiet */
2128  Set_invalidflag();
2129  Sgl_set_quiet(opnd3);
2130  Sgl_copytoptr(opnd3,dstptr);
2131  return(NOEXCEPTION);
2132  }
2133  /*
2134  * return quiet NaN
2135  */
2136  Sgl_copytoptr(opnd1,dstptr);
2137  return(NOEXCEPTION);
2138  }
2139  }
2140 
2141  /*
2142  * check second operand for NaN's or infinity
2143  */
2144  if (Sgl_isinfinity_exponent(opnd2)) {
2145  if (Sgl_iszero_mantissa(opnd2)) {
2146  if (Sgl_isnotnan(opnd3)) {
2147  if (Sgl_iszero_exponentmantissa(opnd1)) {
2148  /*
2149  * invalid since multiply operands are
2150  * zero & infinity
2151  */
2152  if (Is_invalidtrap_enabled())
2153  return(OPC_2E_INVALIDEXCEPTION);
2154  Set_invalidflag();
2155  Sgl_makequietnan(opnd2);
2156  Sgl_copytoptr(opnd2,dstptr);
2157  return(NOEXCEPTION);
2158  }
2159 
2160  /*
2161  * Check third operand for infinity with a
2162  * sign opposite of the multiply result
2163  */
2164  if (Sgl_isinfinity(opnd3) &&
2165  (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2166  /*
2167  * invalid since attempting a magnitude
2168  * subtraction of infinities
2169  */
2170  if (Is_invalidtrap_enabled())
2171  return(OPC_2E_INVALIDEXCEPTION);
2172  Set_invalidflag();
2173  Sgl_makequietnan(resultp1);
2174  Sgl_copytoptr(resultp1,dstptr);
2175  return(NOEXCEPTION);
2176  }
2177 
2178  /*
2179  * return infinity
2180  */
2182  Sgl_copytoptr(resultp1,dstptr);
2183  return(NOEXCEPTION);
2184  }
2185  }
2186  else {
2187  /*
2188  * is NaN; signaling or quiet?
2189  */
2190  if (Sgl_isone_signaling(opnd2)) {
2191  /* trap if INVALIDTRAP enabled */
2192  if (Is_invalidtrap_enabled())
2193  return(OPC_2E_INVALIDEXCEPTION);
2194  /* make NaN quiet */
2195  Set_invalidflag();
2196  Sgl_set_quiet(opnd2);
2197  }
2198  /*
2199  * is third operand a signaling NaN?
2200  */
2201  else if (Sgl_is_signalingnan(opnd3)) {
2202  /* trap if INVALIDTRAP enabled */
2203  if (Is_invalidtrap_enabled())
2204  return(OPC_2E_INVALIDEXCEPTION);
2205  /* make NaN quiet */
2206  Set_invalidflag();
2207  Sgl_set_quiet(opnd3);
2208  Sgl_copytoptr(opnd3,dstptr);
2209  return(NOEXCEPTION);
2210  }
2211  /*
2212  * return quiet NaN
2213  */
2214  Sgl_copytoptr(opnd2,dstptr);
2215  return(NOEXCEPTION);
2216  }
2217  }
2218 
2219  /*
2220  * check third operand for NaN's or infinity
2221  */
2222  if (Sgl_isinfinity_exponent(opnd3)) {
2223  if (Sgl_iszero_mantissa(opnd3)) {
2224  /* return infinity */
2225  Sgl_copytoptr(opnd3,dstptr);
2226  return(NOEXCEPTION);
2227  } else {
2228  /*
2229  * is NaN; signaling or quiet?
2230  */
2231  if (Sgl_isone_signaling(opnd3)) {
2232  /* trap if INVALIDTRAP enabled */
2233  if (Is_invalidtrap_enabled())
2234  return(OPC_2E_INVALIDEXCEPTION);
2235  /* make NaN quiet */
2236  Set_invalidflag();
2237  Sgl_set_quiet(opnd3);
2238  }
2239  /*
2240  * return quiet NaN
2241  */
2242  Sgl_copytoptr(opnd3,dstptr);
2243  return(NOEXCEPTION);
2244  }
2245  }
2246 
2247  /*
2248  * Generate multiply mantissa
2249  */
2250  if (Sgl_isnotzero_exponent(opnd1)) {
2251  /* set hidden bit */
2253  }
2254  else {
2255  /* check for zero */
2256  if (Sgl_iszero_mantissa(opnd1)) {
2257  /*
2258  * Perform the add opnd3 with zero here.
2259  */
2260  if (Sgl_iszero_exponentmantissa(opnd3)) {
2262  Sgl_or_signs(opnd3,resultp1);
2263  } else {
2264  Sgl_and_signs(opnd3,resultp1);
2265  }
2266  }
2267  /*
2268  * Now let's check for trapped underflow case.
2269  */
2270  else if (Sgl_iszero_exponent(opnd3) &&
2272  /* need to normalize results mantissa */
2273  sign_save = Sgl_signextendedsign(opnd3);
2274  result_exponent = 0;
2275  Sgl_leftshiftby1(opnd3);
2276  Sgl_normalize(opnd3,result_exponent);
2277  Sgl_set_sign(opnd3,/*using*/sign_save);
2278  Sgl_setwrapped_exponent(opnd3,result_exponent,
2279  unfl);
2280  Sgl_copytoptr(opnd3,dstptr);
2281  /* inexact = FALSE */
2282  return(OPC_2E_UNDERFLOWEXCEPTION);
2283  }
2284  Sgl_copytoptr(opnd3,dstptr);
2285  return(NOEXCEPTION);
2286  }
2287  /* is denormalized, adjust exponent */
2288  Sgl_clear_signexponent(opnd1);
2289  Sgl_leftshiftby1(opnd1);
2290  Sgl_normalize(opnd1,mpy_exponent);
2291  }
2292  /* opnd2 needs to have hidden bit set with msb in hidden bit */
2293  if (Sgl_isnotzero_exponent(opnd2)) {
2295  }
2296  else {
2297  /* check for zero */
2298  if (Sgl_iszero_mantissa(opnd2)) {
2299  /*
2300  * Perform the add opnd3 with zero here.
2301  */
2302  if (Sgl_iszero_exponentmantissa(opnd3)) {
2304  Sgl_or_signs(opnd3,resultp1);
2305  } else {
2306  Sgl_and_signs(opnd3,resultp1);
2307  }
2308  }
2309  /*
2310  * Now let's check for trapped underflow case.
2311  */
2312  else if (Sgl_iszero_exponent(opnd3) &&
2314  /* need to normalize results mantissa */
2315  sign_save = Sgl_signextendedsign(opnd3);
2316  result_exponent = 0;
2317  Sgl_leftshiftby1(opnd3);
2318  Sgl_normalize(opnd3,result_exponent);
2319  Sgl_set_sign(opnd3,/*using*/sign_save);
2320  Sgl_setwrapped_exponent(opnd3,result_exponent,
2321  unfl);
2322  Sgl_copytoptr(opnd3,dstptr);
2323  /* inexact = FALSE */
2324  return(OPC_2E_UNDERFLOWEXCEPTION);
2325  }
2326  Sgl_copytoptr(opnd3,dstptr);
2327  return(NOEXCEPTION);
2328  }
2329  /* is denormalized; want to normalize */
2330  Sgl_clear_signexponent(opnd2);
2331  Sgl_leftshiftby1(opnd2);
2332  Sgl_normalize(opnd2,mpy_exponent);
2333  }
2334 
2335  /* Multiply the first two source mantissas together */
2336 
2337  /*
2338  * The intermediate result will be kept in tmpres,
2339  * which needs enough room for 106 bits of mantissa,
2340  * so lets call it a Double extended.
2341  */
2342  Sglext_setzero(tmpresp1,tmpresp2);
2343 
2344  /*
2345  * Four bits at a time are inspected in each loop, and a
2346  * simple shift and add multiply algorithm is used.
2347  */
2348  for (count = SGL_P-1; count >= 0; count -= 4) {
2349  Sglext_rightshiftby4(tmpresp1,tmpresp2);
2350  if (Sbit28(opnd1)) {
2351  /* Twoword_add should be an ADD followed by 2 ADDC's */
2352  Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2353  }
2354  if (Sbit29(opnd1)) {
2355  Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2356  }
2357  if (Sbit30(opnd1)) {
2358  Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2359  }
2360  if (Sbit31(opnd1)) {
2361  Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2362  }
2363  Sgl_rightshiftby4(opnd1);
2364  }
2365  if (Is_sexthiddenoverflow(tmpresp1)) {
2366  /* result mantissa >= 2 (mantissa overflow) */
2367  mpy_exponent++;
2368  Sglext_rightshiftby4(tmpresp1,tmpresp2);
2369  } else {
2370  Sglext_rightshiftby3(tmpresp1,tmpresp2);
2371  }
2372 
2373  /*
2374  * Restore the sign of the mpy result which was saved in resultp1.
2375  * The exponent will continue to be kept in mpy_exponent.
2376  */
2377  Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2378 
2379  /*
2380  * No rounding is required, since the result of the multiply
2381  * is exact in the extended format.
2382  */
2383 
2384  /*
2385  * Now we are ready to perform the add portion of the operation.
2386  *
2387  * The exponents need to be kept as integers for now, since the
2388  * multiply result might not fit into the exponent field. We
2389  * can't overflow or underflow because of this yet, since the
2390  * add could bring the final result back into range.
2391  */
2392  add_exponent = Sgl_exponent(opnd3);
2393 
2394  /*
2395  * Check for denormalized or zero add operand.
2396  */
2397  if (add_exponent == 0) {
2398  /* check for zero */
2399  if (Sgl_iszero_mantissa(opnd3)) {
2400  /* right is zero */
2401  /* Left can't be zero and must be result.
2402  *
2403  * The final result is now in tmpres and mpy_exponent,
2404  * and needs to be rounded and squeezed back into
2405  * double precision format from double extended.
2406  */
2407  result_exponent = mpy_exponent;
2408  Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2409  sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2410  goto round;
2411  }
2412 
2413  /*
2414  * Neither are zeroes.
2415  * Adjust exponent and normalize add operand.
2416  */
2417  sign_save = Sgl_signextendedsign(opnd3); /* save sign */
2418  Sgl_clear_signexponent(opnd3);
2419  Sgl_leftshiftby1(opnd3);
2420  Sgl_normalize(opnd3,add_exponent);
2421  Sgl_set_sign(opnd3,sign_save); /* restore sign */
2422  } else {
2424  }
2425  /*
2426  * Copy opnd3 to the double extended variable called right.
2427  */
2428  Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2429 
2430  /*
2431  * A zero "save" helps discover equal operands (for later),
2432  * and is used in swapping operands (if needed).
2433  */
2434  Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2435 
2436  /*
2437  * Compare magnitude of operands.
2438  */
2439  Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2440  Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2441  if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2442  Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2443  /*
2444  * Set the left operand to the larger one by XOR swap.
2445  * First finish the first word "save".
2446  */
2447  Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2448  Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2449  Sglext_swap_lower(tmpresp2,rightp2);
2450  /* also setup exponents used in rest of routine */
2451  diff_exponent = add_exponent - mpy_exponent;
2452  result_exponent = add_exponent;
2453  } else {
2454  /* also setup exponents used in rest of routine */
2455  diff_exponent = mpy_exponent - add_exponent;
2456  result_exponent = mpy_exponent;
2457  }
2458  /* Invariant: left is not smaller than right. */
2459 
2460  /*
2461  * Special case alignment of operands that would force alignment
2462  * beyond the extent of the extension. A further optimization
2463  * could special case this but only reduces the path length for
2464  * this infrequent case.
2465  */
2466  if (diff_exponent > SGLEXT_THRESHOLD) {
2467  diff_exponent = SGLEXT_THRESHOLD;
2468  }
2469 
2470  /* Align right operand by shifting it to the right */
2471  Sglext_clear_sign(rightp1);
2472  Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2473 
2474  /* Treat sum and difference of the operands separately. */
2475  if ((int)save < 0) {
2476  /*
2477  * Difference of the two operands. Overflow can occur if the
2478  * multiply overflowed. A borrow can occur out of the hidden
2479  * bit and force a post normalization phase.
2480  */
2481  Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2482  resultp1,resultp2);
2483  sign_save = Sgl_signextendedsign(resultp1);
2484  if (Sgl_iszero_hidden(resultp1)) {
2485  /* Handle normalization */
2486  /* A straightforward algorithm would now shift the
2487  * result and extension left until the hidden bit
2488  * becomes one. Not all of the extension bits need
2489  * participate in the shift. Only the two most
2490  * significant bits (round and guard) are needed.
2491  * If only a single shift is needed then the guard
2492  * bit becomes a significant low order bit and the
2493  * extension must participate in the rounding.
2494  * If more than a single shift is needed, then all
2495  * bits to the right of the guard bit are zeros,
2496  * and the guard bit may or may not be zero. */
2497  Sglext_leftshiftby1(resultp1,resultp2);
2498 
2499  /* Need to check for a zero result. The sign and
2500  * exponent fields have already been zeroed. The more
2501  * efficient test of the full object can be used.
2502  */
2503  if (Sglext_iszero(resultp1,resultp2)) {
2504  /* Must have been "x-x" or "x+(-x)". */
2506  Sgl_setone_sign(resultp1);
2507  Sgl_copytoptr(resultp1,dstptr);
2508  return(NOEXCEPTION);
2509  }
2510  result_exponent--;
2511 
2512  /* Look to see if normalization is finished. */
2513  if (Sgl_isone_hidden(resultp1)) {
2514  /* No further normalization is needed */
2515  goto round;
2516  }
2517 
2518  /* Discover first one bit to determine shift amount.
2519  * Use a modified binary search. We have already
2520  * shifted the result one position right and still
2521  * not found a one so the remainder of the extension
2522  * must be zero and simplifies rounding. */
2523  /* Scan bytes */
2524  while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2525  Sglext_leftshiftby8(resultp1,resultp2);
2526  result_exponent -= 8;
2527  }
2528  /* Now narrow it down to the nibble */
2529  if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2530  /* The lower nibble contains the
2531  * normalizing one */
2532  Sglext_leftshiftby4(resultp1,resultp2);
2533  result_exponent -= 4;
2534  }
2535  /* Select case where first bit is set (already
2536  * normalized) otherwise select the proper shift. */
2537  jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2538  if (jumpsize <= 7) switch(jumpsize) {
2539  case 1:
2540  Sglext_leftshiftby3(resultp1,resultp2);
2541  result_exponent -= 3;
2542  break;
2543  case 2:
2544  case 3:
2545  Sglext_leftshiftby2(resultp1,resultp2);
2546  result_exponent -= 2;
2547  break;
2548  case 4:
2549  case 5:
2550  case 6:
2551  case 7:
2552  Sglext_leftshiftby1(resultp1,resultp2);
2553  result_exponent -= 1;
2554  break;
2555  }
2556  } /* end if (hidden...)... */
2557  /* Fall through and round */
2558  } /* end if (save < 0)... */
2559  else {
2560  /* Add magnitudes */
2561  Sglext_addition(tmpresp1,tmpresp2,
2562  rightp1,rightp2, /*to*/resultp1,resultp2);
2563  sign_save = Sgl_signextendedsign(resultp1);
2564  if (Sgl_isone_hiddenoverflow(resultp1)) {
2565  /* Prenormalization required. */
2566  Sglext_arithrightshiftby1(resultp1,resultp2);
2567  result_exponent++;
2568  } /* end if hiddenoverflow... */
2569  } /* end else ...add magnitudes... */
2570 
2571  /* Round the result. If the extension and lower two words are
2572  * all zeros, then the result is exact. Otherwise round in the
2573  * correct direction. Underflow is possible. If a postnormalization
2574  * is necessary, then the mantissa is all zeros so no shift is needed.
2575  */
2576  round:
2577  if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2578  Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2579  }
2580  Sgl_set_sign(resultp1,/*using*/sign_save);
2581  if (Sglext_isnotzero_mantissap2(resultp2)) {
2582  inexact = TRUE;
2583  switch(Rounding_mode()) {
2584  case ROUNDNEAREST: /* The default. */
2585  if (Sglext_isone_highp2(resultp2)) {
2586  /* at least 1/2 ulp */
2587  if (Sglext_isnotzero_low31p2(resultp2) ||
2588  Sglext_isone_lowp1(resultp1)) {
2589  /* either exactly half way and odd or
2590  * more than 1/2ulp */
2591  Sgl_increment(resultp1);
2592  }
2593  }
2594  break;
2595 
2596  case ROUNDPLUS:
2597  if (Sgl_iszero_sign(resultp1)) {
2598  /* Round up positive results */
2599  Sgl_increment(resultp1);
2600  }
2601  break;
2602 
2603  case ROUNDMINUS:
2604  if (Sgl_isone_sign(resultp1)) {
2605  /* Round down negative results */
2606  Sgl_increment(resultp1);
2607  }
2608 
2609  case ROUNDZERO:;
2610  /* truncate is simple */
2611  } /* end switch... */
2612  if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2613  }
2614  if (result_exponent >= SGL_INFINITY_EXPONENT) {
2615  /* Overflow */
2616  if (Is_overflowtrap_enabled()) {
2617  /*
2618  * Adjust bias of result
2619  */
2620  Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2621  Sgl_copytoptr(resultp1,dstptr);
2622  if (inexact)
2623  if (Is_inexacttrap_enabled())
2624  return (OPC_2E_OVERFLOWEXCEPTION |
2626  else Set_inexactflag();
2627  return (OPC_2E_OVERFLOWEXCEPTION);
2628  }
2629  inexact = TRUE;
2630  Set_overflowflag();
2631  Sgl_setoverflow(resultp1);
2632  } else if (result_exponent <= 0) { /* underflow case */
2633  if (Is_underflowtrap_enabled()) {
2634  /*
2635  * Adjust bias of result
2636  */
2637  Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2638  Sgl_copytoptr(resultp1,dstptr);
2639  if (inexact)
2640  if (Is_inexacttrap_enabled())
2641  return (OPC_2E_UNDERFLOWEXCEPTION |
2643  else Set_inexactflag();
2644  return(OPC_2E_UNDERFLOWEXCEPTION);
2645  }
2646  else if (inexact && is_tiny) Set_underflowflag();
2647  }
2648  else Sgl_set_exponent(resultp1,result_exponent);
2649  Sgl_copytoptr(resultp1,dstptr);
2650  if (inexact)
2652  else Set_inexactflag();
2653  return(NOEXCEPTION);
2654 }
2655