cryptlib  3.4.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Macros
bn_div.c
Go to the documentation of this file.
1 /* crypto/bn/bn_div.c */
2 /* Copyright (C) 1995-1998 Eric Young ([email protected])
3  * All rights reserved.
4  *
5  * This package is an SSL implementation written
6  * by Eric Young ([email protected]).
7  * The implementation was written so as to conform with Netscapes SSL.
8  *
9  * This library is free for commercial and non-commercial use as long as
10  * the following conditions are aheared to. The following conditions
11  * apply to all code found in this distribution, be it the RC4, RSA,
12  * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13  * included with this distribution is covered by the same copyright terms
14  * except that the holder is Tim Hudson ([email protected]).
15  *
16  * Copyright remains Eric Young's, and as such any Copyright notices in
17  * the code are not to be removed.
18  * If this package is used in a product, Eric Young should be given attribution
19  * as the author of the parts of the library used.
20  * This can be in the form of a textual message at program startup or
21  * in documentation (online or textual) provided with the package.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the copyright
27  * notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  * notice, this list of conditions and the following disclaimer in the
30  * documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  * must display the following acknowledgement:
33  * "This product includes cryptographic software written by
34  * Eric Young ([email protected])"
35  * The word 'cryptographic' can be left out if the rouines from the library
36  * being used are not cryptographic related :-).
37  * 4. If you include any Windows specific code (or a derivative thereof) from
38  * the apps directory (application code) you must include an acknowledgement:
39  * "This product includes software written by Tim Hudson ([email protected])"
40  *
41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  *
53  * The licence and distribution terms for any publically available version or
54  * derivative of this code cannot be changed. i.e. this code cannot simply be
55  * copied and put under another distribution licence
56  * [including the GNU Public Licence.]
57  */
58 
59 #include <stdio.h>
60 #if defined( INC_ALL )
61  #include "bn_lcl.h"
62 #else
63  #include "bn/bn_lcl.h"
64 #endif /* Compiler-specific includes */
65 
66 /* Apple ship their own private fork of gcc rather than any standard version,
67  and going by its behaviour across several releases they must be basing it
68  on alpha versions that they dig up somewhere. One of the things that
69  version 4.0.1 on Intel 32-bit shows up is an optimiser bug with -O3 where
70  the output generated for the code just before the bn_div loop bears no
71  relation whatsoever to the source code. Dropping in a printf() at -03
72  resolves this, with the output being identical to what it would be with
73  -O2. There's at least one silver lining with Apple's buggy custom version
74  of gcc, it gives you the ability to selectively turn off optimisation on
75  a per-function level (which standard gcc doesn't), see
76  http://developer.apple.com/documentation/DeveloperTools/gcc-4.0.1/gcc/Darwin-Pragmas.html
77  So we use the following pragma to reduce the optimisation level from -O3
78  (produces garbled output) to -O2 (produces output identical to -O3 but
79  without the garbling) for Apple's buggy gcc on 32-bit x86 */
80 
81 #if defined( __APPLE__ ) && defined( __GNUC__ ) && \
82  ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ == 0 ) && defined( __i386__ )
83  #pragma optimization_level 2
84 #endif /* Apple gcc 4.0.x with buggy optimisation - pcg */
85 
86 /* The old slow way */
87 #if 0
88 int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
89  BN_CTX *ctx)
90  {
91  int i,nm,nd;
92  int ret = 0;
93  BIGNUM *D;
94 
95  bn_check_top(m);
96  bn_check_top(d);
97  if (BN_is_zero(d))
98  {
100  return(0);
101  }
102 
103  if (BN_ucmp(m,d) < 0)
104  {
105  if (rem != NULL)
106  { if (BN_copy(rem,m) == NULL) return(0); }
107  if (dv != NULL) BN_zero(dv);
108  return(1);
109  }
110 
111  BN_CTX_start(ctx);
112  D = BN_CTX_get(ctx);
113  if (dv == NULL) dv = BN_CTX_get(ctx);
114  if (rem == NULL) rem = BN_CTX_get(ctx);
115  if (D == NULL || dv == NULL || rem == NULL)
116  goto end;
117 
118  nd=BN_num_bits(d);
119  nm=BN_num_bits(m);
120  if (BN_copy(D,d) == NULL) goto end;
121  if (BN_copy(rem,m) == NULL) goto end;
122 
123  /* The next 2 are needed so we can do a dv->d[0]|=1 later
124  * since BN_lshift1 will only work once there is a value :-) */
125  BN_zero(dv);
126  bn_wexpand(dv,1);
127  dv->top=1;
128 
129  if (!BN_lshift(D,D,nm-nd)) goto end;
130  for (i=nm-nd; i>=0; i--)
131  {
132  if (!BN_lshift1(dv,dv)) goto end;
133  if (BN_ucmp(rem,D) >= 0)
134  {
135  dv->d[0]|=1;
136  if (!BN_usub(rem,rem,D)) goto end;
137  }
138 /* CAN IMPROVE (and have now :=) */
139  if (!BN_rshift1(D,D)) goto end;
140  }
141  rem->neg=BN_is_zero(rem)?0:m->neg;
142  dv->neg=m->neg^d->neg;
143  ret = 1;
144  end:
145  BN_CTX_end(ctx);
146  return(ret);
147  }
148 
149 #else
150 
151 #if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \
152  && !defined(PEDANTIC) && !defined(BN_DIV3W)
153 # if defined(__GNUC__) && __GNUC__>=2
154 # if defined(__i386) || defined (__i386__)
155  /*
156  * There were two reasons for implementing this template:
157  * - GNU C generates a call to a function (__udivdi3 to be exact)
158  * in reply to ((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0 (I fail to
159  * understand why...);
160  * - divl doesn't only calculate quotient, but also leaves
161  * remainder in %edx which we can definitely use here:-)
162  *
164  */
165 # define bn_div_words(n0,n1,d0) \
166  ({ asm volatile ( \
167  "divl %4" \
168  : "=a"(q), "=d"(rem) \
169  : "a"(n1), "d"(n0), "g"(d0) \
170  : "cc"); \
171  q; \
172  })
173 # define REMAINDER_IS_ALREADY_CALCULATED
174 # elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
175  /*
176  * Same story here, but it's 128-bit by 64-bit division. Wow!
178  */
179 # define bn_div_words(n0,n1,d0) \
180  ({ asm volatile ( \
181  "divq %4" \
182  : "=a"(q), "=d"(rem) \
183  : "a"(n1), "d"(n0), "g"(d0) \
184  : "cc"); \
185  q; \
186  })
187 # define REMAINDER_IS_ALREADY_CALCULATED
188 # endif /* __<cpu> */
189 # endif /* __GNUC__ */
190 #endif /* OPENSSL_NO_ASM */
191 
192 
193 /* BN_div computes dv := num / divisor, rounding towards zero, and sets up
194  * rm such that dv*divisor + rm = num holds.
195  * Thus:
196  * dv->neg == num->neg ^ divisor->neg (unless the result is zero)
197  * rm->neg == num->neg (unless the remainder is zero)
198  * If 'dv' or 'rm' is NULL, the respective value is not returned.
199  */
200 int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
201  BN_CTX *ctx)
202  {
203  int norm_shift,i,loop;
204  BIGNUM *tmp,wnum,*snum,*sdiv,*res;
205  BN_ULONG *resp,*wnump;
206  BN_ULONG d0,d1;
207  int num_n,div_n;
208 
209  if (dv)
210  bn_check_top(dv);
211  if (rm)
212  bn_check_top(rm);
213  bn_check_top(num);
214  bn_check_top(divisor);
215 
216  if (BN_is_zero(divisor))
217  {
219  return(0);
220  }
221 
222  if (BN_ucmp(num,divisor) < 0)
223  {
224  if (rm != NULL)
225  { if (BN_copy(rm,num) == NULL) return(0); }
226  if (dv != NULL) BN_zero(dv);
227  return(1);
228  }
229 
230  BN_CTX_start(ctx);
231  tmp=BN_CTX_get(ctx);
232  snum=BN_CTX_get(ctx);
233  sdiv=BN_CTX_get(ctx);
234  if (dv == NULL)
235  res=BN_CTX_get(ctx);
236  else res=dv;
237 #if 0 /* pcg */
238  if (sdiv == NULL || res == NULL) goto err;
239 #else
240  if (sdiv == NULL || res == NULL || \
241  tmp == NULL || snum == NULL) goto err; /* pcg */
242 #endif /* 0 */
243 
244  /* First we normalise the numbers */
245  norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
246  if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
247  sdiv->neg=0;
248  norm_shift+=BN_BITS2;
249  if (!(BN_lshift(snum,num,norm_shift))) goto err;
250  snum->neg=0;
251  div_n=sdiv->top;
252  num_n=snum->top;
253  loop=num_n-div_n;
254  /* Lets setup a 'window' into snum
255  * This is the part that corresponds to the current
256  * 'area' being divided */
257  wnum.neg = 0;
258  wnum.d = &(snum->d[loop]);
259  wnum.top = div_n;
260  /* only needed when BN_ucmp messes up the values between top and max */
261  wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */
262 
263  /* Get the top 2 words of sdiv */
264  /* div_n=sdiv->top; */
265  d0=sdiv->d[div_n-1];
266  d1=(div_n == 1)?0:sdiv->d[div_n-2];
267 
268  /* pointer to the 'top' of snum */
269  wnump= &(snum->d[num_n-1]);
270 
271  /* Setup to 'res' */
272  res->neg= (num->neg^divisor->neg);
273  if (!bn_wexpand(res,(loop+1))) goto err;
274  res->top=loop;
275  resp= &(res->d[loop-1]);
276 
277  /* space for temp */
278  if (!bn_wexpand(tmp,(div_n+1))) goto err;
279 
280  if (BN_ucmp(&wnum,sdiv) >= 0)
281  {
282  /* If BN_DEBUG_RAND is defined BN_ucmp changes (via
283  * bn_pollute) the const bignum arguments =>
284  * clean the values between top and max again */
285  bn_clear_top2max(&wnum);
286  bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n);
287  *resp=1;
288  }
289  else
290  res->top--;
291  /* if res->top == 0 then clear the neg value otherwise decrease
292  * the resp pointer */
293  if (res->top == 0)
294  res->neg = 0;
295  else
296  resp--;
297 
298  for (i=0; i<loop-1; i++, wnump--, resp--)
299  {
300  BN_ULONG q,l0;
301  /* the first part of the loop uses the top two words of
302  * snum and sdiv to calculate a BN_ULONG q such that
303  * | wnum - sdiv * q | < sdiv */
304 #if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
305  BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG);
306  q=bn_div_3_words(wnump,d1,d0);
307 #else
308  BN_ULONG n0,n1,rem=0;
309 
310  n0=wnump[0];
311  n1=wnump[-1];
312  if (n0 == d0)
313  q=BN_MASK2;
314  else /* n0 < d0 */
315  {
316 #ifdef BN_LLONG
317  BN_ULLONG t2;
318 
319 #if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
320  q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0);
321 #else
322  q=bn_div_words(n0,n1,d0);
323 #ifdef BN_DEBUG_LEVITTE
324  fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
325 X) -> 0x%08X\n",
326  n0, n1, d0, q);
327 #endif
328 #endif
329 
330 #ifndef REMAINDER_IS_ALREADY_CALCULATED
331  /*
332  * rem doesn't have to be BN_ULLONG. The least we
333  * know it's less that d0, isn't it?
334  */
335  rem=(n1-q*d0)&BN_MASK2;
336 #endif
337  t2=(BN_ULLONG)d1*q;
338 
339  for (;;)
340  {
341  if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
342  break;
343  q--;
344  rem += d0;
345  if (rem < d0) break; /* don't let rem overflow */
346  t2 -= d1;
347  }
348 #else /* !BN_LLONG */
349  BN_ULONG t2l,t2h;
350 #if !defined( BN_UMULT_LOHI ) && !defined( BN_UMULT_HIGH ) /* pcg */
351  BN_ULONG ql,qh;
352 #endif
353 
354  q=bn_div_words(n0,n1,d0);
355 #ifdef BN_DEBUG_LEVITTE
356  fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
357 X) -> 0x%08X\n",
358  n0, n1, d0, q);
359 #endif
360 #ifndef REMAINDER_IS_ALREADY_CALCULATED
361  rem=(n1-q*d0)&BN_MASK2;
362 #endif
363 
364 #if defined(BN_UMULT_LOHI)
365  BN_UMULT_LOHI(t2l,t2h,d1,q);
366 #elif defined(BN_UMULT_HIGH)
367  t2l = d1 * q;
368  t2h = BN_UMULT_HIGH(d1,q);
369 #else
370  t2l=LBITS(d1); t2h=HBITS(d1);
371  ql =LBITS(q); qh =HBITS(q);
372  mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
373 #endif
374 
375  for (;;)
376  {
377  if ((t2h < rem) ||
378  ((t2h == rem) && (t2l <= wnump[-2])))
379  break;
380  q--;
381  rem += d0;
382  if (rem < d0) break; /* don't let rem overflow */
383  if (t2l < d1) t2h--; t2l -= d1;
384  }
385 #endif /* !BN_LLONG */
386  }
387 #endif /* !BN_DIV3W */
388 
389  l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
390  tmp->d[div_n]=l0;
391  wnum.d--;
392  /* ingore top values of the bignums just sub the two
393  * BN_ULONG arrays with bn_sub_words */
394  if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1))
395  {
396  /* Note: As we have considered only the leading
397  * two BN_ULONGs in the calculation of q, sdiv * q
398  * might be greater than wnum (but then (q-1) * sdiv
399  * is less or equal than wnum)
400  */
401  q--;
402  if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
403  /* we can't have an overflow here (assuming
404  * that q != 0, but if q == 0 then tmp is
405  * zero anyway) */
406  (*wnump)++;
407  }
408  /* store part of the result */
409  *resp = q;
410  }
411  bn_correct_top(snum);
412  if (rm != NULL)
413  {
414  /* Keep a copy of the neg flag in num because if rm==num
415  * BN_rshift() will overwrite it.
416  */
417  int neg = num->neg;
418  BN_rshift(rm,snum,norm_shift);
419  if (!BN_is_zero(rm))
420  rm->neg = neg;
421  bn_check_top(rm);
422  }
423  BN_CTX_end(ctx);
424  return(1);
425 err:
426  if (rm)
427  bn_check_top(rm);
428  BN_CTX_end(ctx);
429  return(0);
430  }
431 
432 #endif