Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
poly_sin.c
Go to the documentation of this file.
1 /*---------------------------------------------------------------------------+
2  | poly_sin.c |
3  | |
4  | Computation of an approximation of the sin function and the cosine |
5  | function by a polynomial. |
6  | |
7  | Copyright (C) 1992,1993,1994,1997,1999 |
8  | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
9  | E-mail [email protected] |
10  | |
11  | |
12  +---------------------------------------------------------------------------*/
13 
14 #include "exception.h"
15 #include "reg_constant.h"
16 #include "fpu_emu.h"
17 #include "fpu_system.h"
18 #include "control_w.h"
19 #include "poly.h"
20 
21 #define N_COEFF_P 4
22 #define N_COEFF_N 4
23 
24 static const unsigned long long pos_terms_l[N_COEFF_P] = {
25  0xaaaaaaaaaaaaaaabLL,
26  0x00d00d00d00cf906LL,
27  0x000006b99159a8bbLL,
28  0x000000000d7392e6LL
29 };
30 
31 static const unsigned long long neg_terms_l[N_COEFF_N] = {
32  0x2222222222222167LL,
33  0x0002e3bc74aab624LL,
34  0x0000000b09229062LL,
35  0x00000000000c7973LL
36 };
37 
38 #define N_COEFF_PH 4
39 #define N_COEFF_NH 4
40 static const unsigned long long pos_terms_h[N_COEFF_PH] = {
41  0x0000000000000000LL,
42  0x05b05b05b05b0406LL,
43  0x000049f93edd91a9LL,
44  0x00000000c9c9ed62LL
45 };
46 
47 static const unsigned long long neg_terms_h[N_COEFF_NH] = {
48  0xaaaaaaaaaaaaaa98LL,
49  0x001a01a01a019064LL,
50  0x0000008f76c68a77LL,
51  0x0000000000d58f5eLL
52 };
53 
54 /*--- poly_sine() -----------------------------------------------------------+
55  | |
56  +---------------------------------------------------------------------------*/
57 void poly_sine(FPU_REG *st0_ptr)
58 {
59  int exponent, echange;
60  Xsig accumulator, argSqrd, argTo4;
61  unsigned long fix_up, adj;
62  unsigned long long fixed_arg;
64 
65  exponent = exponent(st0_ptr);
66 
67  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
68 
69  /* Split into two ranges, for arguments below and above 1.0 */
70  /* The boundary between upper and lower is approx 0.88309101259 */
71  if ((exponent < -1)
72  || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
73  /* The argument is <= 0.88309101259 */
74 
75  argSqrd.msw = st0_ptr->sigh;
76  argSqrd.midw = st0_ptr->sigl;
77  argSqrd.lsw = 0;
78  mul64_Xsig(&argSqrd, &significand(st0_ptr));
79  shr_Xsig(&argSqrd, 2 * (-1 - exponent));
80  argTo4.msw = argSqrd.msw;
81  argTo4.midw = argSqrd.midw;
82  argTo4.lsw = argSqrd.lsw;
83  mul_Xsig_Xsig(&argTo4, &argTo4);
84 
85  polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
86  N_COEFF_N - 1);
87  mul_Xsig_Xsig(&accumulator, &argSqrd);
88  negate_Xsig(&accumulator);
89 
90  polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
91  N_COEFF_P - 1);
92 
93  shr_Xsig(&accumulator, 2); /* Divide by four */
94  accumulator.msw |= 0x80000000; /* Add 1.0 */
95 
96  mul64_Xsig(&accumulator, &significand(st0_ptr));
97  mul64_Xsig(&accumulator, &significand(st0_ptr));
98  mul64_Xsig(&accumulator, &significand(st0_ptr));
99 
100  /* Divide by four, FPU_REG compatible, etc */
101  exponent = 3 * exponent;
102 
103  /* The minimum exponent difference is 3 */
104  shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
105 
106  negate_Xsig(&accumulator);
107  XSIG_LL(accumulator) += significand(st0_ptr);
108 
109  echange = round_Xsig(&accumulator);
110 
111  setexponentpos(&result, exponent(st0_ptr) + echange);
112  } else {
113  /* The argument is > 0.88309101259 */
114  /* We use sin(st(0)) = cos(pi/2-st(0)) */
115 
116  fixed_arg = significand(st0_ptr);
117 
118  if (exponent == 0) {
119  /* The argument is >= 1.0 */
120 
121  /* Put the binary point at the left. */
122  fixed_arg <<= 1;
123  }
124  /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
125  fixed_arg = 0x921fb54442d18469LL - fixed_arg;
126  /* There is a special case which arises due to rounding, to fix here. */
127  if (fixed_arg == 0xffffffffffffffffLL)
128  fixed_arg = 0;
129 
130  XSIG_LL(argSqrd) = fixed_arg;
131  argSqrd.lsw = 0;
132  mul64_Xsig(&argSqrd, &fixed_arg);
133 
134  XSIG_LL(argTo4) = XSIG_LL(argSqrd);
135  argTo4.lsw = argSqrd.lsw;
136  mul_Xsig_Xsig(&argTo4, &argTo4);
137 
138  polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
139  N_COEFF_NH - 1);
140  mul_Xsig_Xsig(&accumulator, &argSqrd);
141  negate_Xsig(&accumulator);
142 
143  polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
144  N_COEFF_PH - 1);
145  negate_Xsig(&accumulator);
146 
147  mul64_Xsig(&accumulator, &fixed_arg);
148  mul64_Xsig(&accumulator, &fixed_arg);
149 
150  shr_Xsig(&accumulator, 3);
151  negate_Xsig(&accumulator);
152 
153  add_Xsig_Xsig(&accumulator, &argSqrd);
154 
155  shr_Xsig(&accumulator, 1);
156 
157  accumulator.lsw |= 1; /* A zero accumulator here would cause problems */
158  negate_Xsig(&accumulator);
159 
160  /* The basic computation is complete. Now fix the answer to
161  compensate for the error due to the approximation used for
162  pi/2
163  */
164 
165  /* This has an exponent of -65 */
166  fix_up = 0x898cc517;
167  /* The fix-up needs to be improved for larger args */
168  if (argSqrd.msw & 0xffc00000) {
169  /* Get about 32 bit precision in these: */
170  fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
171  }
172  fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
173 
174  adj = accumulator.lsw; /* temp save */
175  accumulator.lsw -= fix_up;
176  if (accumulator.lsw > adj)
177  XSIG_LL(accumulator)--;
178 
179  echange = round_Xsig(&accumulator);
180 
181  setexponentpos(&result, echange - 1);
182  }
183 
184  significand(&result) = XSIG_LL(accumulator);
185  setsign(&result, getsign(st0_ptr));
186  FPU_copy_to_reg0(&result, TAG_Valid);
187 
188 #ifdef PARANOID
189  if ((exponent(&result) >= 0)
190  && (significand(&result) > 0x8000000000000000LL)) {
191  EXCEPTION(EX_INTERNAL | 0x150);
192  }
193 #endif /* PARANOID */
194 
195 }
196 
197 /*--- poly_cos() ------------------------------------------------------------+
198  | |
199  +---------------------------------------------------------------------------*/
200 void poly_cos(FPU_REG *st0_ptr)
201 {
202  FPU_REG result;
203  long int exponent, exp2, echange;
204  Xsig accumulator, argSqrd, fix_up, argTo4;
205  unsigned long long fixed_arg;
206 
207 #ifdef PARANOID
208  if ((exponent(st0_ptr) > 0)
209  || ((exponent(st0_ptr) == 0)
210  && (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
213  return;
214  }
215 #endif /* PARANOID */
216 
217  exponent = exponent(st0_ptr);
218 
219  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
220 
221  if ((exponent < -1)
222  || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
223  /* arg is < 0.687705 */
224 
225  argSqrd.msw = st0_ptr->sigh;
226  argSqrd.midw = st0_ptr->sigl;
227  argSqrd.lsw = 0;
228  mul64_Xsig(&argSqrd, &significand(st0_ptr));
229 
230  if (exponent < -1) {
231  /* shift the argument right by the required places */
232  shr_Xsig(&argSqrd, 2 * (-1 - exponent));
233  }
234 
235  argTo4.msw = argSqrd.msw;
236  argTo4.midw = argSqrd.midw;
237  argTo4.lsw = argSqrd.lsw;
238  mul_Xsig_Xsig(&argTo4, &argTo4);
239 
240  polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
241  N_COEFF_NH - 1);
242  mul_Xsig_Xsig(&accumulator, &argSqrd);
243  negate_Xsig(&accumulator);
244 
245  polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
246  N_COEFF_PH - 1);
247  negate_Xsig(&accumulator);
248 
249  mul64_Xsig(&accumulator, &significand(st0_ptr));
250  mul64_Xsig(&accumulator, &significand(st0_ptr));
251  shr_Xsig(&accumulator, -2 * (1 + exponent));
252 
253  shr_Xsig(&accumulator, 3);
254  negate_Xsig(&accumulator);
255 
256  add_Xsig_Xsig(&accumulator, &argSqrd);
257 
258  shr_Xsig(&accumulator, 1);
259 
260  /* It doesn't matter if accumulator is all zero here, the
261  following code will work ok */
262  negate_Xsig(&accumulator);
263 
264  if (accumulator.lsw & 0x80000000)
265  XSIG_LL(accumulator)++;
266  if (accumulator.msw == 0) {
267  /* The result is 1.0 */
269  return;
270  } else {
271  significand(&result) = XSIG_LL(accumulator);
272 
273  /* will be a valid positive nr with expon = -1 */
274  setexponentpos(&result, -1);
275  }
276  } else {
277  fixed_arg = significand(st0_ptr);
278 
279  if (exponent == 0) {
280  /* The argument is >= 1.0 */
281 
282  /* Put the binary point at the left. */
283  fixed_arg <<= 1;
284  }
285  /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
286  fixed_arg = 0x921fb54442d18469LL - fixed_arg;
287  /* There is a special case which arises due to rounding, to fix here. */
288  if (fixed_arg == 0xffffffffffffffffLL)
289  fixed_arg = 0;
290 
291  exponent = -1;
292  exp2 = -1;
293 
294  /* A shift is needed here only for a narrow range of arguments,
295  i.e. for fixed_arg approx 2^-32, but we pick up more... */
296  if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
297  fixed_arg <<= 16;
298  exponent -= 16;
299  exp2 -= 16;
300  }
301 
302  XSIG_LL(argSqrd) = fixed_arg;
303  argSqrd.lsw = 0;
304  mul64_Xsig(&argSqrd, &fixed_arg);
305 
306  if (exponent < -1) {
307  /* shift the argument right by the required places */
308  shr_Xsig(&argSqrd, 2 * (-1 - exponent));
309  }
310 
311  argTo4.msw = argSqrd.msw;
312  argTo4.midw = argSqrd.midw;
313  argTo4.lsw = argSqrd.lsw;
314  mul_Xsig_Xsig(&argTo4, &argTo4);
315 
316  polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
317  N_COEFF_N - 1);
318  mul_Xsig_Xsig(&accumulator, &argSqrd);
319  negate_Xsig(&accumulator);
320 
321  polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
322  N_COEFF_P - 1);
323 
324  shr_Xsig(&accumulator, 2); /* Divide by four */
325  accumulator.msw |= 0x80000000; /* Add 1.0 */
326 
327  mul64_Xsig(&accumulator, &fixed_arg);
328  mul64_Xsig(&accumulator, &fixed_arg);
329  mul64_Xsig(&accumulator, &fixed_arg);
330 
331  /* Divide by four, FPU_REG compatible, etc */
332  exponent = 3 * exponent;
333 
334  /* The minimum exponent difference is 3 */
335  shr_Xsig(&accumulator, exp2 - exponent);
336 
337  negate_Xsig(&accumulator);
338  XSIG_LL(accumulator) += fixed_arg;
339 
340  /* The basic computation is complete. Now fix the answer to
341  compensate for the error due to the approximation used for
342  pi/2
343  */
344 
345  /* This has an exponent of -65 */
346  XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
347  fix_up.lsw = 0;
348 
349  /* The fix-up needs to be improved for larger args */
350  if (argSqrd.msw & 0xffc00000) {
351  /* Get about 32 bit precision in these: */
352  fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
353  fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
354  }
355 
356  exp2 += norm_Xsig(&accumulator);
357  shr_Xsig(&accumulator, 1); /* Prevent overflow */
358  exp2++;
359  shr_Xsig(&fix_up, 65 + exp2);
360 
361  add_Xsig_Xsig(&accumulator, &fix_up);
362 
363  echange = round_Xsig(&accumulator);
364 
365  setexponentpos(&result, exp2 + echange);
366  significand(&result) = XSIG_LL(accumulator);
367  }
368 
369  FPU_copy_to_reg0(&result, TAG_Valid);
370 
371 #ifdef PARANOID
372  if ((exponent(&result) >= 0)
373  && (significand(&result) > 0x8000000000000000LL)) {
374  EXCEPTION(EX_INTERNAL | 0x151);
375  }
376 #endif /* PARANOID */
377 
378 }