Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
op-2.h
Go to the documentation of this file.
1 /* Software floating-point emulation.
2  Basic two-word fraction declaration and manipulation.
3  Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
4  This file is part of the GNU C Library.
5  Contributed by Richard Henderson ([email protected]),
6  Jakub Jelinek ([email protected]),
7  David S. Miller ([email protected]) and
8  Peter Maydell ([email protected]).
9 
10  The GNU C Library is free software; you can redistribute it and/or
11  modify it under the terms of the GNU Library General Public License as
12  published by the Free Software Foundation; either version 2 of the
13  License, or (at your option) any later version.
14 
15  The GNU C Library is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  Library General Public License for more details.
19 
20  You should have received a copy of the GNU Library General Public
21  License along with the GNU C Library; see the file COPYING.LIB. If
22  not, write to the Free Software Foundation, Inc.,
23  59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
24 
25 #ifndef __MATH_EMU_OP_2_H__
26 #define __MATH_EMU_OP_2_H__
27 
28 #define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0 = 0, X##_f1 = 0
29 #define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
30 #define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I)
31 #define _FP_FRAC_HIGH_2(X) (X##_f1)
32 #define _FP_FRAC_LOW_2(X) (X##_f0)
33 #define _FP_FRAC_WORD_2(X,w) (X##_f##w)
34 
35 #define _FP_FRAC_SLL_2(X,N) \
36  do { \
37  if ((N) < _FP_W_TYPE_SIZE) \
38  { \
39  if (__builtin_constant_p(N) && (N) == 1) \
40  { \
41  X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \
42  X##_f0 += X##_f0; \
43  } \
44  else \
45  { \
46  X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
47  X##_f0 <<= (N); \
48  } \
49  } \
50  else \
51  { \
52  X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
53  X##_f0 = 0; \
54  } \
55  } while (0)
56 
57 #define _FP_FRAC_SRL_2(X,N) \
58  do { \
59  if ((N) < _FP_W_TYPE_SIZE) \
60  { \
61  X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
62  X##_f1 >>= (N); \
63  } \
64  else \
65  { \
66  X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
67  X##_f1 = 0; \
68  } \
69  } while (0)
70 
71 /* Right shift with sticky-lsb. */
72 #define _FP_FRAC_SRS_2(X,N,sz) \
73  do { \
74  if ((N) < _FP_W_TYPE_SIZE) \
75  { \
76  X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
77  (__builtin_constant_p(N) && (N) == 1 \
78  ? X##_f0 & 1 \
79  : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
80  X##_f1 >>= (N); \
81  } \
82  else \
83  { \
84  X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \
85  (((X##_f1 << (2*_FP_W_TYPE_SIZE - (N))) | X##_f0) != 0)); \
86  X##_f1 = 0; \
87  } \
88  } while (0)
89 
90 #define _FP_FRAC_ADDI_2(X,I) \
91  __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
92 
93 #define _FP_FRAC_ADD_2(R,X,Y) \
94  __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
95 
96 #define _FP_FRAC_SUB_2(R,X,Y) \
97  __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
98 
99 #define _FP_FRAC_DEC_2(X,Y) \
100  __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
101 
102 #define _FP_FRAC_CLZ_2(R,X) \
103  do { \
104  if (X##_f1) \
105  __FP_CLZ(R,X##_f1); \
106  else \
107  { \
108  __FP_CLZ(R,X##_f0); \
109  R += _FP_W_TYPE_SIZE; \
110  } \
111  } while(0)
112 
113 /* Predicates */
114 #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0)
115 #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
116 #define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
117 #define _FP_FRAC_CLEAR_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
118 #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
119 #define _FP_FRAC_GT_2(X, Y) \
120  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
121 #define _FP_FRAC_GE_2(X, Y) \
122  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
123 
124 #define _FP_ZEROFRAC_2 0, 0
125 #define _FP_MINFRAC_2 0, 1
126 #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
127 
128 /*
129  * Internals
130  */
131 
132 #define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1)
133 
134 #define __FP_CLZ_2(R, xh, xl) \
135  do { \
136  if (xh) \
137  __FP_CLZ(R,xh); \
138  else \
139  { \
140  __FP_CLZ(R,xl); \
141  R += _FP_W_TYPE_SIZE; \
142  } \
143  } while(0)
144 
145 #if 0
146 
147 #ifndef __FP_FRAC_ADDI_2
148 #define __FP_FRAC_ADDI_2(xh, xl, i) \
149  (xh += ((xl += i) < i))
150 #endif
151 #ifndef __FP_FRAC_ADD_2
152 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
153  (rh = xh + yh + ((rl = xl + yl) < xl))
154 #endif
155 #ifndef __FP_FRAC_SUB_2
156 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
157  (rh = xh - yh - ((rl = xl - yl) > xl))
158 #endif
159 #ifndef __FP_FRAC_DEC_2
160 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
161  do { \
162  UWtype _t = xl; \
163  xh -= yh + ((xl -= yl) > _t); \
164  } while (0)
165 #endif
166 
167 #else
168 
169 #undef __FP_FRAC_ADDI_2
170 #define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i)
171 #undef __FP_FRAC_ADD_2
172 #define __FP_FRAC_ADD_2 add_ssaaaa
173 #undef __FP_FRAC_SUB_2
174 #define __FP_FRAC_SUB_2 sub_ddmmss
175 #undef __FP_FRAC_DEC_2
176 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
177 
178 #endif
179 
180 /*
181  * Unpack the raw bits of a native fp value. Do not classify or
182  * normalize the data.
183  */
184 
185 #define _FP_UNPACK_RAW_2(fs, X, val) \
186  do { \
187  union _FP_UNION_##fs _flo; _flo.flt = (val); \
188  \
189  X##_f0 = _flo.bits.frac0; \
190  X##_f1 = _flo.bits.frac1; \
191  X##_e = _flo.bits.exp; \
192  X##_s = _flo.bits.sign; \
193  } while (0)
194 
195 #define _FP_UNPACK_RAW_2_P(fs, X, val) \
196  do { \
197  union _FP_UNION_##fs *_flo = \
198  (union _FP_UNION_##fs *)(val); \
199  \
200  X##_f0 = _flo->bits.frac0; \
201  X##_f1 = _flo->bits.frac1; \
202  X##_e = _flo->bits.exp; \
203  X##_s = _flo->bits.sign; \
204  } while (0)
205 
206 
207 /*
208  * Repack the raw bits of a native fp value.
209  */
210 
211 #define _FP_PACK_RAW_2(fs, val, X) \
212  do { \
213  union _FP_UNION_##fs _flo; \
214  \
215  _flo.bits.frac0 = X##_f0; \
216  _flo.bits.frac1 = X##_f1; \
217  _flo.bits.exp = X##_e; \
218  _flo.bits.sign = X##_s; \
219  \
220  (val) = _flo.flt; \
221  } while (0)
222 
223 #define _FP_PACK_RAW_2_P(fs, val, X) \
224  do { \
225  union _FP_UNION_##fs *_flo = \
226  (union _FP_UNION_##fs *)(val); \
227  \
228  _flo->bits.frac0 = X##_f0; \
229  _flo->bits.frac1 = X##_f1; \
230  _flo->bits.exp = X##_e; \
231  _flo->bits.sign = X##_s; \
232  } while (0)
233 
234 
235 /*
236  * Multiplication algorithms:
237  */
238 
239 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
240 
241 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
242  do { \
243  _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
244  \
245  doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
246  doit(_b_f1, _b_f0, X##_f0, Y##_f1); \
247  doit(_c_f1, _c_f0, X##_f1, Y##_f0); \
248  doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
249  \
250  __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
251  _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0, \
252  _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
253  _FP_FRAC_WORD_4(_z,1)); \
254  __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
255  _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0, \
256  _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
257  _FP_FRAC_WORD_4(_z,1)); \
258  \
259  /* Normalize since we know where the msb of the multiplicands \
260  were (bit B), we know that the msb of the of the product is \
261  at either 2B or 2B-1. */ \
262  _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
263  R##_f0 = _FP_FRAC_WORD_4(_z,0); \
264  R##_f1 = _FP_FRAC_WORD_4(_z,1); \
265  } while (0)
266 
267 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
268  Do only 3 multiplications instead of four. This one is for machines
269  where multiplication is much more expensive than subtraction. */
270 
271 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
272  do { \
273  _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
274  _FP_W_TYPE _d; \
275  int _c1, _c2; \
276  \
277  _b_f0 = X##_f0 + X##_f1; \
278  _c1 = _b_f0 < X##_f0; \
279  _b_f1 = Y##_f0 + Y##_f1; \
280  _c2 = _b_f1 < Y##_f0; \
281  doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
282  doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1); \
283  doit(_c_f1, _c_f0, X##_f1, Y##_f1); \
284  \
285  _b_f0 &= -_c2; \
286  _b_f1 &= -_c1; \
287  __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
288  _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d, \
289  0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1)); \
290  __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
291  _b_f0); \
292  __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
293  _b_f1); \
294  __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
295  _FP_FRAC_WORD_4(_z,1), \
296  0, _d, _FP_FRAC_WORD_4(_z,0)); \
297  __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
298  _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0); \
299  __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), \
300  _c_f1, _c_f0, \
301  _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2)); \
302  \
303  /* Normalize since we know where the msb of the multiplicands \
304  were (bit B), we know that the msb of the of the product is \
305  at either 2B or 2B-1. */ \
306  _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
307  R##_f0 = _FP_FRAC_WORD_4(_z,0); \
308  R##_f1 = _FP_FRAC_WORD_4(_z,1); \
309  } while (0)
310 
311 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
312  do { \
313  _FP_FRAC_DECL_4(_z); \
314  _FP_W_TYPE _x[2], _y[2]; \
315  _x[0] = X##_f0; _x[1] = X##_f1; \
316  _y[0] = Y##_f0; _y[1] = Y##_f1; \
317  \
318  mpn_mul_n(_z_f, _x, _y, 2); \
319  \
320  /* Normalize since we know where the msb of the multiplicands \
321  were (bit B), we know that the msb of the of the product is \
322  at either 2B or 2B-1. */ \
323  _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
324  R##_f0 = _z_f[0]; \
325  R##_f1 = _z_f[1]; \
326  } while (0)
327 
328 /* Do at most 120x120=240 bits multiplication using double floating
329  point multiplication. This is useful if floating point
330  multiplication has much bigger throughput than integer multiply.
331  It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
332  between 106 and 120 only.
333  Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
334  SETFETZ is a macro which will disable all FPU exceptions and set rounding
335  towards zero, RESETFE should optionally reset it back. */
336 
337 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
338  do { \
339  static const double _const[] = { \
340  /* 2^-24 */ 5.9604644775390625e-08, \
341  /* 2^-48 */ 3.5527136788005009e-15, \
342  /* 2^-72 */ 2.1175823681357508e-22, \
343  /* 2^-96 */ 1.2621774483536189e-29, \
344  /* 2^28 */ 2.68435456e+08, \
345  /* 2^4 */ 1.600000e+01, \
346  /* 2^-20 */ 9.5367431640625e-07, \
347  /* 2^-44 */ 5.6843418860808015e-14, \
348  /* 2^-68 */ 3.3881317890172014e-21, \
349  /* 2^-92 */ 2.0194839173657902e-28, \
350  /* 2^-116 */ 1.2037062152420224e-35}; \
351  double _a240, _b240, _c240, _d240, _e240, _f240, \
352  _g240, _h240, _i240, _j240, _k240; \
353  union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
354  _p240, _q240, _r240, _s240; \
355  UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
356  \
357  if (wfracbits < 106 || wfracbits > 120) \
358  abort(); \
359  \
360  setfetz; \
361  \
362  _e240 = (double)(long)(X##_f0 & 0xffffff); \
363  _j240 = (double)(long)(Y##_f0 & 0xffffff); \
364  _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff); \
365  _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff); \
366  _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
367  _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
368  _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff); \
369  _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff); \
370  _a240 = (double)(long)(X##_f1 >> 32); \
371  _f240 = (double)(long)(Y##_f1 >> 32); \
372  _e240 *= _const[3]; \
373  _j240 *= _const[3]; \
374  _d240 *= _const[2]; \
375  _i240 *= _const[2]; \
376  _c240 *= _const[1]; \
377  _h240 *= _const[1]; \
378  _b240 *= _const[0]; \
379  _g240 *= _const[0]; \
380  _s240.d = _e240*_j240;\
381  _r240.d = _d240*_j240 + _e240*_i240;\
382  _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240;\
383  _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
384  _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
385  _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
386  _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
387  _l240.d = _a240*_g240 + _b240*_f240; \
388  _k240 = _a240*_f240; \
389  _r240.d += _s240.d; \
390  _q240.d += _r240.d; \
391  _p240.d += _q240.d; \
392  _o240.d += _p240.d; \
393  _n240.d += _o240.d; \
394  _m240.d += _n240.d; \
395  _l240.d += _m240.d; \
396  _k240 += _l240.d; \
397  _s240.d -= ((_const[10]+_s240.d)-_const[10]); \
398  _r240.d -= ((_const[9]+_r240.d)-_const[9]); \
399  _q240.d -= ((_const[8]+_q240.d)-_const[8]); \
400  _p240.d -= ((_const[7]+_p240.d)-_const[7]); \
401  _o240.d += _const[7]; \
402  _n240.d += _const[6]; \
403  _m240.d += _const[5]; \
404  _l240.d += _const[4]; \
405  if (_s240.d != 0.0) _y240 = 1; \
406  if (_r240.d != 0.0) _y240 = 1; \
407  if (_q240.d != 0.0) _y240 = 1; \
408  if (_p240.d != 0.0) _y240 = 1; \
409  _t240 = (DItype)_k240; \
410  _u240 = _l240.i; \
411  _v240 = _m240.i; \
412  _w240 = _n240.i; \
413  _x240 = _o240.i; \
414  R##_f1 = (_t240 << (128 - (wfracbits - 1))) \
415  | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)); \
416  R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
417  | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
418  | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
419  | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
420  | _y240; \
421  resetfe; \
422  } while (0)
423 
424 /*
425  * Division algorithms:
426  */
427 
428 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
429  do { \
430  _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \
431  if (_FP_FRAC_GT_2(X, Y)) \
432  { \
433  _n_f2 = X##_f1 >> 1; \
434  _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
435  _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
436  } \
437  else \
438  { \
439  R##_e--; \
440  _n_f2 = X##_f1; \
441  _n_f1 = X##_f0; \
442  _n_f0 = 0; \
443  } \
444  \
445  /* Normalize, i.e. make the most significant bit of the \
446  denominator set. */ \
447  _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); \
448  \
449  udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \
450  umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); \
451  _r_f0 = _n_f0; \
452  if (_FP_FRAC_GT_2(_m, _r)) \
453  { \
454  R##_f1--; \
455  _FP_FRAC_ADD_2(_r, Y, _r); \
456  if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
457  { \
458  R##_f1--; \
459  _FP_FRAC_ADD_2(_r, Y, _r); \
460  } \
461  } \
462  _FP_FRAC_DEC_2(_r, _m); \
463  \
464  if (_r_f1 == Y##_f1) \
465  { \
466  /* This is a special case, not an optimization \
467  (_r/Y##_f1 would not fit into UWtype). \
468  As _r is guaranteed to be < Y, R##_f0 can be either \
469  (UWtype)-1 or (UWtype)-2. But as we know what kind \
470  of bits it is (sticky, guard, round), we don't care. \
471  We also don't care what the reminder is, because the \
472  guard bit will be set anyway. -jj */ \
473  R##_f0 = -1; \
474  } \
475  else \
476  { \
477  udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \
478  umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0); \
479  _r_f0 = 0; \
480  if (_FP_FRAC_GT_2(_m, _r)) \
481  { \
482  R##_f0--; \
483  _FP_FRAC_ADD_2(_r, Y, _r); \
484  if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
485  { \
486  R##_f0--; \
487  _FP_FRAC_ADD_2(_r, Y, _r); \
488  } \
489  } \
490  if (!_FP_FRAC_EQ_2(_r, _m)) \
491  R##_f0 |= _FP_WORK_STICKY; \
492  } \
493  } while (0)
494 
495 
496 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \
497  do { \
498  _FP_W_TYPE _x[4], _y[2], _z[4]; \
499  _y[0] = Y##_f0; _y[1] = Y##_f1; \
500  _x[0] = _x[3] = 0; \
501  if (_FP_FRAC_GT_2(X, Y)) \
502  { \
503  R##_e++; \
504  _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) | \
505  X##_f1 >> (_FP_W_TYPE_SIZE - \
506  (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
507  _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \
508  } \
509  else \
510  { \
511  _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \
512  X##_f1 >> (_FP_W_TYPE_SIZE - \
513  (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE))); \
514  _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE); \
515  } \
516  \
517  (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \
518  R##_f1 = _z[1]; \
519  R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \
520  } while (0)
521 
522 
523 /*
524  * Square root algorithms:
525  * We have just one right now, maybe Newton approximation
526  * should be added for those machines where division is fast.
527  */
528 
529 #define _FP_SQRT_MEAT_2(R, S, T, X, q) \
530  do { \
531  while (q) \
532  { \
533  T##_f1 = S##_f1 + q; \
534  if (T##_f1 <= X##_f1) \
535  { \
536  S##_f1 = T##_f1 + q; \
537  X##_f1 -= T##_f1; \
538  R##_f1 += q; \
539  } \
540  _FP_FRAC_SLL_2(X, 1); \
541  q >>= 1; \
542  } \
543  q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
544  while (q != _FP_WORK_ROUND) \
545  { \
546  T##_f0 = S##_f0 + q; \
547  T##_f1 = S##_f1; \
548  if (T##_f1 < X##_f1 || \
549  (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
550  { \
551  S##_f0 = T##_f0 + q; \
552  S##_f1 += (T##_f0 > S##_f0); \
553  _FP_FRAC_DEC_2(X, T); \
554  R##_f0 += q; \
555  } \
556  _FP_FRAC_SLL_2(X, 1); \
557  q >>= 1; \
558  } \
559  if (X##_f0 | X##_f1) \
560  { \
561  if (S##_f1 < X##_f1 || \
562  (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
563  R##_f0 |= _FP_WORK_ROUND; \
564  R##_f0 |= _FP_WORK_STICKY; \
565  } \
566  } while (0)
567 
568 
569 /*
570  * Assembly/disassembly for converting to/from integral types.
571  * No shifting or overflow handled here.
572  */
574 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
575  do { \
576  if (rsize <= _FP_W_TYPE_SIZE) \
577  r = X##_f0; \
578  else \
579  { \
580  r = X##_f1; \
581  r <<= _FP_W_TYPE_SIZE; \
582  r += X##_f0; \
583  } \
584  } while (0)
585 
586 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
587  do { \
588  X##_f0 = r; \
589  X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
590  } while (0)
591 
592 /*
593  * Convert FP values between word sizes
594  */
595 
596 #define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) \
597  do { \
598  if (S##_c != FP_CLS_NAN) \
599  _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \
600  _FP_WFRACBITS_##sfs); \
601  else \
602  _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs)); \
603  D##_f = S##_f0; \
604  } while (0)
605 
606 #define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) \
607  do { \
608  D##_f0 = S##_f; \
609  D##_f1 = 0; \
610  _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \
611  } while (0)
612 
613 #endif