Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
multi_arith.h
Go to the documentation of this file.
1 /* multi_arith.h: multi-precision integer arithmetic functions, needed
2  to do extended-precision floating point.
3 
4  (c) 1998 David Huggins-Daines.
5 
6  Somewhat based on arch/alpha/math-emu/ieee-math.c, which is (c)
7  David Mosberger-Tang.
8 
9  You may copy, modify, and redistribute this file under the terms of
10  the GNU General Public License, version 2, or any later version, at
11  your convenience. */
12 
13 /* Note:
14 
15  These are not general multi-precision math routines. Rather, they
16  implement the subset of integer arithmetic that we need in order to
17  multiply, divide, and normalize 128-bit unsigned mantissae. */
18 
19 #ifndef MULTI_ARITH_H
20 #define MULTI_ARITH_H
21 
22 static inline void fp_denormalize(struct fp_ext *reg, unsigned int cnt)
23 {
24  reg->exp += cnt;
25 
26  switch (cnt) {
27  case 0 ... 8:
28  reg->lowmant = reg->mant.m32[1] << (8 - cnt);
29  reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) |
30  (reg->mant.m32[0] << (32 - cnt));
31  reg->mant.m32[0] = reg->mant.m32[0] >> cnt;
32  break;
33  case 9 ... 32:
34  reg->lowmant = reg->mant.m32[1] >> (cnt - 8);
35  if (reg->mant.m32[1] << (40 - cnt))
36  reg->lowmant |= 1;
37  reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) |
38  (reg->mant.m32[0] << (32 - cnt));
39  reg->mant.m32[0] = reg->mant.m32[0] >> cnt;
40  break;
41  case 33 ... 39:
42  asm volatile ("bfextu %1{%2,#8},%0" : "=d" (reg->lowmant)
43  : "m" (reg->mant.m32[0]), "d" (64 - cnt));
44  if (reg->mant.m32[1] << (40 - cnt))
45  reg->lowmant |= 1;
46  reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32);
47  reg->mant.m32[0] = 0;
48  break;
49  case 40 ... 71:
50  reg->lowmant = reg->mant.m32[0] >> (cnt - 40);
51  if ((reg->mant.m32[0] << (72 - cnt)) || reg->mant.m32[1])
52  reg->lowmant |= 1;
53  reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32);
54  reg->mant.m32[0] = 0;
55  break;
56  default:
57  reg->lowmant = reg->mant.m32[0] || reg->mant.m32[1];
58  reg->mant.m32[0] = 0;
59  reg->mant.m32[1] = 0;
60  break;
61  }
62 }
63 
64 static inline int fp_overnormalize(struct fp_ext *reg)
65 {
66  int shift;
67 
68  if (reg->mant.m32[0]) {
69  asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[0]));
70  reg->mant.m32[0] = (reg->mant.m32[0] << shift) | (reg->mant.m32[1] >> (32 - shift));
71  reg->mant.m32[1] = (reg->mant.m32[1] << shift);
72  } else {
73  asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[1]));
74  reg->mant.m32[0] = (reg->mant.m32[1] << shift);
75  reg->mant.m32[1] = 0;
76  shift += 32;
77  }
78 
79  return shift;
80 }
81 
82 static inline int fp_addmant(struct fp_ext *dest, struct fp_ext *src)
83 {
84  int carry;
85 
86  /* we assume here, gcc only insert move and a clr instr */
87  asm volatile ("add.b %1,%0" : "=d,g" (dest->lowmant)
88  : "g,d" (src->lowmant), "0,0" (dest->lowmant));
89  asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[1])
90  : "d" (src->mant.m32[1]), "0" (dest->mant.m32[1]));
91  asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[0])
92  : "d" (src->mant.m32[0]), "0" (dest->mant.m32[0]));
93  asm volatile ("addx.l %0,%0" : "=d" (carry) : "0" (0));
94 
95  return carry;
96 }
97 
98 static inline int fp_addcarry(struct fp_ext *reg)
99 {
100  if (++reg->exp == 0x7fff) {
101  if (reg->mant.m64)
103  reg->mant.m64 = 0;
105  return 0;
106  }
107  reg->lowmant = (reg->mant.m32[1] << 7) | (reg->lowmant ? 1 : 0);
108  reg->mant.m32[1] = (reg->mant.m32[1] >> 1) |
109  (reg->mant.m32[0] << 31);
110  reg->mant.m32[0] = (reg->mant.m32[0] >> 1) | 0x80000000;
111 
112  return 1;
113 }
114 
115 static inline void fp_submant(struct fp_ext *dest, struct fp_ext *src1,
116  struct fp_ext *src2)
117 {
118  /* we assume here, gcc only insert move and a clr instr */
119  asm volatile ("sub.b %1,%0" : "=d,g" (dest->lowmant)
120  : "g,d" (src2->lowmant), "0,0" (src1->lowmant));
121  asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[1])
122  : "d" (src2->mant.m32[1]), "0" (src1->mant.m32[1]));
123  asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[0])
124  : "d" (src2->mant.m32[0]), "0" (src1->mant.m32[0]));
125 }
126 
127 #define fp_mul64(desth, destl, src1, src2) ({ \
128  asm ("mulu.l %2,%1:%0" : "=d" (destl), "=d" (desth) \
129  : "dm" (src1), "0" (src2)); \
130 })
131 #define fp_div64(quot, rem, srch, srcl, div) \
132  asm ("divu.l %2,%1:%0" : "=d" (quot), "=d" (rem) \
133  : "dm" (div), "1" (srch), "0" (srcl))
134 #define fp_add64(dest1, dest2, src1, src2) ({ \
135  asm ("add.l %1,%0" : "=d,dm" (dest2) \
136  : "dm,d" (src2), "0,0" (dest2)); \
137  asm ("addx.l %1,%0" : "=d" (dest1) \
138  : "d" (src1), "0" (dest1)); \
139 })
140 #define fp_addx96(dest, src) ({ \
141  /* we assume here, gcc only insert move and a clr instr */ \
142  asm volatile ("add.l %1,%0" : "=d,g" (dest->m32[2]) \
143  : "g,d" (temp.m32[1]), "0,0" (dest->m32[2])); \
144  asm volatile ("addx.l %1,%0" : "=d" (dest->m32[1]) \
145  : "d" (temp.m32[0]), "0" (dest->m32[1])); \
146  asm volatile ("addx.l %1,%0" : "=d" (dest->m32[0]) \
147  : "d" (0), "0" (dest->m32[0])); \
148 })
149 #define fp_sub64(dest, src) ({ \
150  asm ("sub.l %1,%0" : "=d,dm" (dest.m32[1]) \
151  : "dm,d" (src.m32[1]), "0,0" (dest.m32[1])); \
152  asm ("subx.l %1,%0" : "=d" (dest.m32[0]) \
153  : "d" (src.m32[0]), "0" (dest.m32[0])); \
154 })
155 #define fp_sub96c(dest, srch, srcm, srcl) ({ \
156  char carry; \
157  asm ("sub.l %1,%0" : "=d,dm" (dest.m32[2]) \
158  : "dm,d" (srcl), "0,0" (dest.m32[2])); \
159  asm ("subx.l %1,%0" : "=d" (dest.m32[1]) \
160  : "d" (srcm), "0" (dest.m32[1])); \
161  asm ("subx.l %2,%1; scs %0" : "=d" (carry), "=d" (dest.m32[0]) \
162  : "d" (srch), "1" (dest.m32[0])); \
163  carry; \
164 })
165 
166 static inline void fp_multiplymant(union fp_mant128 *dest, struct fp_ext *src1,
167  struct fp_ext *src2)
168 {
169  union fp_mant64 temp;
170 
171  fp_mul64(dest->m32[0], dest->m32[1], src1->mant.m32[0], src2->mant.m32[0]);
172  fp_mul64(dest->m32[2], dest->m32[3], src1->mant.m32[1], src2->mant.m32[1]);
173 
174  fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[0], src2->mant.m32[1]);
175  fp_addx96(dest, temp);
176 
177  fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[1], src2->mant.m32[0]);
178  fp_addx96(dest, temp);
179 }
180 
181 static inline void fp_dividemant(union fp_mant128 *dest, struct fp_ext *src,
182  struct fp_ext *div)
183 {
184  union fp_mant128 tmp;
185  union fp_mant64 tmp64;
186  unsigned long *mantp = dest->m32;
187  unsigned long fix, rem, first, dummy;
188  int i;
189 
190  /* the algorithm below requires dest to be smaller than div,
191  but both have the high bit set */
192  if (src->mant.m64 >= div->mant.m64) {
193  fp_sub64(src->mant, div->mant);
194  *mantp = 1;
195  } else
196  *mantp = 0;
197  mantp++;
198 
199  /* basic idea behind this algorithm: we can't divide two 64bit numbers
200  (AB/CD) directly, but we can calculate AB/C0, but this means this
201  quotient is off by C0/CD, so we have to multiply the first result
202  to fix the result, after that we have nearly the correct result
203  and only a few corrections are needed. */
204 
205  /* C0/CD can be precalculated, but it's an 64bit division again, but
206  we can make it a bit easier, by dividing first through C so we get
207  10/1D and now only a single shift and the value fits into 32bit. */
208  fix = 0x80000000;
209  dummy = div->mant.m32[1] / div->mant.m32[0] + 1;
210  dummy = (dummy >> 1) | fix;
211  fp_div64(fix, dummy, fix, 0, dummy);
212  fix--;
213 
214  for (i = 0; i < 3; i++, mantp++) {
215  if (src->mant.m32[0] == div->mant.m32[0]) {
216  fp_div64(first, rem, 0, src->mant.m32[1], div->mant.m32[0]);
217 
218  fp_mul64(*mantp, dummy, first, fix);
219  *mantp += fix;
220  } else {
221  fp_div64(first, rem, src->mant.m32[0], src->mant.m32[1], div->mant.m32[0]);
222 
223  fp_mul64(*mantp, dummy, first, fix);
224  }
225 
226  fp_mul64(tmp.m32[0], tmp.m32[1], div->mant.m32[0], first - *mantp);
227  fp_add64(tmp.m32[0], tmp.m32[1], 0, rem);
228  tmp.m32[2] = 0;
229 
230  fp_mul64(tmp64.m32[0], tmp64.m32[1], *mantp, div->mant.m32[1]);
231  fp_sub96c(tmp, 0, tmp64.m32[0], tmp64.m32[1]);
232 
233  src->mant.m32[0] = tmp.m32[1];
234  src->mant.m32[1] = tmp.m32[2];
235 
236  while (!fp_sub96c(tmp, 0, div->mant.m32[0], div->mant.m32[1])) {
237  src->mant.m32[0] = tmp.m32[1];
238  src->mant.m32[1] = tmp.m32[2];
239  *mantp += 1;
240  }
241  }
242 }
243 
244 static inline void fp_putmant128(struct fp_ext *dest, union fp_mant128 *src,
245  int shift)
246 {
247  unsigned long tmp;
248 
249  switch (shift) {
250  case 0:
251  dest->mant.m64 = src->m64[0];
252  dest->lowmant = src->m32[2] >> 24;
253  if (src->m32[3] || (src->m32[2] << 8))
254  dest->lowmant |= 1;
255  break;
256  case 1:
257  asm volatile ("lsl.l #1,%0"
258  : "=d" (tmp) : "0" (src->m32[2]));
259  asm volatile ("roxl.l #1,%0"
260  : "=d" (dest->mant.m32[1]) : "0" (src->m32[1]));
261  asm volatile ("roxl.l #1,%0"
262  : "=d" (dest->mant.m32[0]) : "0" (src->m32[0]));
263  dest->lowmant = tmp >> 24;
264  if (src->m32[3] || (tmp << 8))
265  dest->lowmant |= 1;
266  break;
267  case 31:
268  asm volatile ("lsr.l #1,%1; roxr.l #1,%0"
269  : "=d" (dest->mant.m32[0])
270  : "d" (src->m32[0]), "0" (src->m32[1]));
271  asm volatile ("roxr.l #1,%0"
272  : "=d" (dest->mant.m32[1]) : "0" (src->m32[2]));
273  asm volatile ("roxr.l #1,%0"
274  : "=d" (tmp) : "0" (src->m32[3]));
275  dest->lowmant = tmp >> 24;
276  if (src->m32[3] << 7)
277  dest->lowmant |= 1;
278  break;
279  case 32:
280  dest->mant.m32[0] = src->m32[1];
281  dest->mant.m32[1] = src->m32[2];
282  dest->lowmant = src->m32[3] >> 24;
283  if (src->m32[3] << 8)
284  dest->lowmant |= 1;
285  break;
286  }
287 }
288 
289 #endif /* MULTI_ARITH_H */