月光软件站 - 编程文档 - 其他语言 - 30位有效数字的浮点数结构解决double数据类型多次累加后的明显的误差

其他语言

本类阅读TOP10

·基于Solaris 开发环境的整体构思
·使用AutoMake轻松生成Makefile
·BCB数据库图像保存技术
·GNU中的Makefile
·射频芯片nRF401天线设计的分析
·iframe 的自适应高度
·BCB之Socket通信
·软件企业如何实施CMM
·入门系列--OpenGL最简单的入门
·WIN95中日志钩子（JournalRecord Hook）的使用

→ 分类导航

VC语言	Delphi
VB语言	ASP
Perl	Java
Script	数据库
其他语言	游戏开发
文件格式	网站制作
软件工程	.NET开发

30位有效数字的浮点数结构解决double数据类型多次累加后的明显的误差

作者：未知来源：月光软件站 加入时间：2005-2-28　月光软件站

30位有效数字的浮点数结构解决double数据类型多次累加后的明显的误差
标准的c或者c++的double数据类型只有15位有效数字（好像有这么回事，参看IEEE 754 ），
因此产生了大的数字多次累加后的明显的误差，在财务计算中，这种误差是不能接受的。
参看http://blog.csdn.net/l1t/archive/2004/10/09/129110.aspx
来自Lcc-win32 的src\doubledouble\doubledouble.c
（作者主页：http://members.lycos.co.uk/keithmbriggs/doubledouble.html
上还有一个c++封装的类，有兴趣的人可以去看。）
利用2个double变量构造出一个doubledouble结构，解决了这个问题。
测试办法：把main()函数的内容替换成下面内容。
int main(void)
{
doubledouble a = new_doubledouble(5.0,0.0);
doubledouble b = new_doubledouble(4.0,0.0);
doubledouble c = a*b;
double x1,x2;
char buffer[256];
int i;
a=a-a+99999999.99;
c=c-c+0.0;
for(i=0;i<81920;i++)
{
c=c+a;
}
doubledoubletoa(c,buffer);
printf("sum() =%s\n",buffer);
c=a*81920.0;
doubledoubletoa(c,buffer);
printf("mult()=%s\n",buffer);
x1=c.hi+c.lo;
printf("mult()=%18.2lf\n",x1);
return 0;
}
运行结果：
sum() =8.191999999180799560546875000000000e12
mult()=8.191999999180799560546875000000000e12
mult()= 8191999999180.80
在Lcc-win32 中，可以不加修改地通过编译。
如果在gcc下编译，需要定义宏#define GCC
在gcc和vs.net 2003 中，函数
doubledouble ddexp(const doubledouble& x)
{
...
sum1=y*((((ysq+3960.)*ysq+2162160.)*ysq+302702400.)*ysq+8821612800.);
sum2=(((90.*ysq+110880.)*ysq+30270240.)*ysq+2075673600.)*ysq+17643225600.;

...
}
上述2行不能通过编译，如果不需要这个函数，可以删除。

/* doubledouble.c */
#include <string.h>
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
/*
This software was written by the mathematician Keith Martin Briggs. It implements
an extended precision data type "doubledouble", that is made up of two floating
point numbers. I have rewritten it for lcc-win32 and extensiveley used this code
to test the compiler's operator subclassing module.

The code should compile without problems in any C++ compiler.
---------------------------------------------------------------jacob
Copyright (C) 1997 Keith Martin Briggs

Except where otherwise indicated,
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with This program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/

// 97 Aug 04 KMB added ldexp
// 97 Jul 11 Moved This stuff out of quad.h, created inline.h so it can
// be #included even if we're not inlining, by just "#define inline"
// 97 Jul 12 Added all combos of doubledouble/double/int +-*/. Only a couple actually
// written; most just call the others by swapping arguments. Generates
// equivalent code with a good inlining compiler (tested with g++ 2.7.2).
// - e.g., all subtraction is now done by addition of unary minus.
// - removed if's from doubledouble*int. Zero-branch code is 2.5 faster (tested)
// - generally cleaned up and re-organized the order of the functions,
//   now they're grouped nicely by function.
// - tested Newton's division. Works but is terribly slow, because
//   it needs to do several doubledouble + and * operations, and doubledouble /
//   without it is about the same speed at doubledouble * anyway, so no win.
// - moved recip here as an inline.
// - checked and tested doubledouble/double (BUG?), seems fine.
typedef struct doubledouble {
double hi;
double lo;
} doubledouble;
#ifdef __LCC__
#define x86_FIX \
unsigned short __old_cw, __new_cw;\
_asm("\tfnstcw\t%__old_cw");\
__new_cw = (__old_cw & ~0x300) | 0x200;\
_asm ("\tfldcw\t%__new_cw")

#define END_x86_FIX _asm ("\tfldcw\t%__old_cw")
#else
#define x86_FIX unsigned short __old_cw,__new_cw; _asm {fnstcw __old_cw}\
__new_cw = (__old_cw & ~0x300) | 0x200;\
_asm { fldcw __new_cw};
#define END_x86_FIX _asm { fldcw __new_cw}
#endif
#ifdef GCC
#undef x86_FIX
#undef END_x86_FIX
#define x86_FIX
#define END_x86_FIX
#endif

static double Split = 134217729.0;
doubledouble new_doubledouble(double x,double y);

doubledouble operator-(const doubledouble &x)
{
doubledouble z;

z.hi = -x.hi;
z.lo = -x.lo;
return z;
}

// Absolute value
doubledouble ddfabs(const doubledouble& x)
{
if (x.hi>=0.0) return x;
else {
doubledouble result;

  result.hi = -x.hi;
  result.lo = -x.lo;
  return result;
}
}

// Addition
/*      (C) W. Kahan 1989
*       NOTICE:
*       Copyrighted programs may not be translated, used, nor
*       reproduced without the author's permission. Normally that
*       permission is granted freely for academic and scientific
*       purposes subject to the following three requirements:
*       1. This NOTICE and the copyright notices must remain attached
*          to the programs and their translations.
*       2. Users of such a program should regard themselves as voluntary
*          participants in the author's researches and so are obliged
*          to report their experience with the program back to the author.
*       3. Neither the author nor the institution that employs him will
*          be held responsible for the consequences of using a program
*          for which neither has received payment.
*       Would-be commercial users of these programs must correspond
*       with the author to arrange terms of payment and warranty.
*/

///////////////////////////////////////////////////////////////
/////////////////// Addition and Subtraction /////////////////
///////////////////////////////////////////////////////////////

// Binary addition
doubledouble operator +(const doubledouble& x,const doubledouble& y)
{
double H, h, T, t, S, s, e, f;
doubledouble z;
x86_FIX;
S = x.hi+y.hi;
T = x.lo+y.lo;
e = S-x.hi;
f = T-x.lo;
// s = S-e;
t = T-f;
s = (y.hi-e)+(x.hi-(S-e));
t = (y.lo-f)+(x.lo-(T-f));
e = s+T;
H = S+e;
h = e+(S-H);
e = t+h;
z.hi = H+e;
z.lo = e+(double)(H-z.hi);
END_x86_FIX;
return z;
}

doubledouble operator +(const double& x,const doubledouble& y)
{
double H, h, S, s, e;
doubledouble z;
x86_FIX;
S = x+y.hi;
e = S-x;
s = S-e;
s = (y.hi-e)+(x-s);
H = S+(s+y.lo);
h = (s+y.lo)+(S-H);
z.hi = H+h;
z.lo = h+(double)(H-z.hi);
END_x86_FIX;
return z;
}
doubledouble operator +(const doubledouble& x,const double& y )
{
return y+x;
}
doubledouble operator +(int x,const doubledouble& y)
{
return (double)(x)+y;
}
doubledouble operator +(doubledouble& x, int y) {
return y+x;
}

// Subtraction
doubledouble operator -(const doubledouble& x,const doubledouble& y)
{
return x+(-y);
}
doubledouble operator -(const double& x, const doubledouble& y)
{
return x+(-y);
}
doubledouble operator -(const int x, const doubledouble& y)
{
return x+(-y);
}
doubledouble operator -(const doubledouble& x, const double& y)
{
return x+(-y);
}
doubledouble operator -(const doubledouble& x,const int y)
{
return x+(-y);
}

////////////////////////// Self-Addition ///////////////////////
doubledouble& operator +=(doubledouble &This,const doubledouble& y)
{
double H, h, T, t, S, s, e, f;
x86_FIX;
S =This.hi+y.hi;
T = This.lo+y.lo;
e = S-This.hi;
f = T-This.lo;
s = S-e;
t = T-f;
s = (y.hi-e)+(This.hi-s);
t = (y.lo-f)+(This.lo-t);
f = s+T;
H = S+f;
h = f+(S-H);
This.hi = H+(t+h);
This.lo = (t+h)+(double)(H-This.hi);
END_x86_FIX;
return This;
}

doubledouble& operator +=(doubledouble &This,const double& y)
{
double H, h, S, s, e, f;
x86_FIX;

S =This.hi+y;
e = S-This.hi;
s = S-e;
s = (y-e)+(This.hi-s);
f = s+This.lo;
H = S+f;
h = f+(S-H);
This.hi = H+h;
This.lo = h+(double)(H-This.hi);
END_x86_FIX;
return This;
}
doubledouble& operator +=(doubledouble &This,const int y)
{
return This += (double)(y);
}

// Self-Subtraction
doubledouble& operator -=(doubledouble &This,const doubledouble& y)
{
return This += -y;
}
doubledouble& operator -=(doubledouble &This,const double& y)
{
return This += -y;
}
doubledouble& operator -=(doubledouble &This,const int y)
{
return This += -y;
}

/////////////////////////////////////////////////////////////
//////////////////// Multiplication ///////////////////////
/////////////////////////////////////////////////////////////

// Binary Multiplication
doubledouble operator *(const doubledouble& x,const doubledouble& y)
{
double hx, tx, hy, ty, C, c;
doubledouble z;
x86_FIX;

C = Split*x.hi;
hx = C-x.hi;
c = Split*y.hi;
hx = C-hx;
tx = x.hi-hx;
hy = c-y.hi;
C = x.hi*y.hi;
hy = c-hy;
ty = y.hi-hy;
c = ((((hx*hy-C)+hx*ty)+tx*hy)+tx*ty)+(x.hi*y.lo+x.lo*y.hi);
z.hi = C+c;
hx=C-z.hi;
z.lo = c+hx;
END_x86_FIX;
return z;
}

// double*doubledouble
doubledouble operator *(const double& x, const doubledouble& y)
{
double hx, tx, hy, ty, C, c;
doubledouble z;
x86_FIX;
C = Split*x;
hx = C-x;
c = Split*y.hi;
hx = C-hx ;
tx = x-hx;
hy = c-y.hi;
C = x*y.hi;
hy = c-hy;
ty = y.hi - hy;
c = ((((hx*hy-C)+hx*ty)+tx*hy)+tx*ty)+x*y.lo;
z.hi = C+c;
z.lo = c+(double)(C-z.hi);
END_x86_FIX;
return z;
}

doubledouble operator *(int x, const doubledouble& y )
{
return ((const double)(x))*y;
}
doubledouble operator *(const doubledouble& x,const double& y )
{
return y*x;
}
doubledouble operator *(const doubledouble& x,const int y )
{
return y*x;
}

// Self-multiplication
doubledouble& operator *=(doubledouble &This,const doubledouble& y )
{
double hx, tx, hy, ty, C, c;
x86_FIX;
C = Split*This.hi;
hx = C-This.hi;
c = Split*y.hi;
hx = C-hx;
tx =This.hi-hx;
hy = c-y.hi;
C =This.hi*y.hi;
hy = c-hy;
ty = y.hi-hy;
c = ((((hx*hy-C)+hx*ty)+tx*hy)+tx*ty)+(This.hi*y.lo+This.lo*y.hi);
hx = C+c;
This.hi = hx;
This.lo = c+(double)(C-hx);
END_x86_FIX;
return This;
}

doubledouble& operator *=(doubledouble &This,const double& y )
{
double hx, tx, hy, ty, C, c;
x86_FIX;
C = Split*This.hi;
hx = C-This.hi;
c = Split*y;
hx = C-hx;
tx =This.hi-hx;
hy = c-y;
C =This.hi*y;
hy = c-hy;
ty = y-hy;
c = ((((hx*hy-C)+hx*ty)+tx*hy)+tx*ty)+(This.lo*y);
hx = C+c;
This.hi = hx;
This.lo = c+(double)(C-hx);
END_x86_FIX;
return This;
}

doubledouble& operator *=(doubledouble &This,const int y )
{
return This *= (const double)(y);
}

////////////////////////////////////////////////////////////////
////////////////////////// Division //////////////////////////
////////////////////////////////////////////////////////////////

// Reciprocal
doubledouble recip(const doubledouble& y) {
double hc, tc, hy, ty, C, c, U, u;
doubledouble z;
x86_FIX;

C = 1.0/y.hi;
c = Split*C;
hc =c-C;
u = Split*y.hi;
hc = c-hc;
tc = C-hc;
hy = u-y.hi;
U = C*y.hi;
hy = u-hy;
ty = y.hi-hy;
u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
c = ((((1.0-U)-u))-C*y.lo)/y.hi;
z.hi = C+c;
z.lo = (double)(C-z.hi)+c;
END_x86_FIX;
return z;
}

doubledouble operator /(const doubledouble& x,const doubledouble& y )
{
double hc, tc, hy, ty, C, c, U, u;
doubledouble z;
x86_FIX;
C = x.hi/y.hi;
c =Split*C;
hc =c-C;
u = Split*y.hi;
hc = c-hc;
tc = C-hc;
hy = u-y.hi;
U = C * y.hi;
hy = u-hy;
ty = y.hi-hy;
u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
c = ((((x.hi-U)-u)+x.lo)-C*y.lo)/y.hi;
z.hi = C+c;
z.lo = (double)(C-z.hi)+c;
END_x86_FIX;
return z;
}

// double/doubledouble:
doubledouble operator /(const double& x,const doubledouble& y )
{
double hc, tc, hy, ty, C, c, U, u;
doubledouble z;
x86_FIX;
C = x/y.hi;
c = Split*C;
hc =c-C;
u = Split*y.hi;
hc = c-hc;
tc = C-hc;
hy = u-y.hi;
U = C*y.hi;
hy = u-hy;
ty = y.hi-hy;
u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
c = ((((x-U)-u))-C*y.lo)/y.hi;
z.hi = C+c;
z.lo = (double)(C-z.hi)+c;
END_x86_FIX;
return z;
}

doubledouble operator /(const doubledouble& x,const double& y )
{
double hc, tc, hy, ty, C, c, U, u;
doubledouble z;

x86_FIX;
C = x.hi/y;
c = Split*C;
hc = c-C;
u = Split*y;
hc = c-hc;
tc = C-hc;
hy = u-y;
U = C*y;
hy = u-hy;
ty = y-hy;
u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
c = (((x.hi-U)-u)+x.lo)/y;
z.hi = C+c;
z.lo = (double)(C-z.hi)+c;
END_x86_FIX;
return z;
}

// doubledouble/int
doubledouble operator /(const doubledouble& x,const int y)
{
return x/(const double)(y);
}
doubledouble operator /(const int x,const doubledouble& y)
{
return (const double)(x)/y;
}

// Self-division.
doubledouble& operator /=(doubledouble &This,const doubledouble& y)
{
double hc, tc, hy, ty, C, c, U, u;
x86_FIX;
C =This.hi/y.hi;
c = Split*C;
hc =c-C;
u = Split*y.hi;
hc = c-hc;
tc = C-hc;
hy = u-y.hi;
U = C * y.hi;
hy = u-hy;
ty = y.hi-hy;
u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
c = ((((This.hi-U)-u)+This.lo)-C*y.lo)/y.hi;
u = C+c;
This.hi = u;
This.lo = (double)(C-u)+c;
END_x86_FIX;
return This;
}

doubledouble& operator /=(doubledouble &This,const double& y)
{
double hc, tc, hy, ty, C, c, U, u;
x86_FIX;

C =This.hi/y;
c = Split*C;
hc =c-C;
u = Split*y;
hc = c-hc;
tc = C-hc;
hy = u-y;
U = C * y;
hy = u-hy;
ty = y-hy;
u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty;
c = (((This.hi-U)-u)+This.lo)/y;
u = C+c;
This.hi = u;
This.lo = (double)(C-u)+c;
END_x86_FIX;
return This;
}

doubledouble& operator /=(doubledouble &This,const int y)
{
return (This) /=(const double)(y);
}
doubledouble Qcopysign(const doubledouble& x,const double y)
{
if (y>=0.0) return ddfabs(x);
else return -ddfabs(x);
}

doubledouble atodd(const char *s)
{
doubledouble result = {
0.0,0.0 };
int n, sign, ex = 0;
/* eat whitespace */
while (*s == ' ' || *s == '\t' || *s == '\n') s++;
switch (*s) { // get sign of mantissa

case '-':
  {
   sign = -1;
   s++;
   break;
  }
case '+':
  s++; // no break

default:
  sign = 1;
}
/* get digits before decimal point */
while (n=(*s++)-'0', n>=0 && n<10) result = 10.0*result+n;
s--;
if (*s == '.')   /* get digits after decimal point */ {
  s++;
  while (n=(*s++)-'0', n>=0 && n<10) {
   result = 10.0*result+n;
   --ex;
  }
  s--;
}
if (*s == 'e' || *s == 'E') /* get exponent */ ex+=atoi(++s);
/* exponent adjustment */
while (ex-- > 0) result *= 10.0;
while (++ex < 0) result /= 10.0;
return (sign>=0) ? result : -result;
}
// Square (faster than x*x)
doubledouble sqr(const doubledouble& x)
{
double hx,tx,C,c;
doubledouble r;
x86_FIX;
C=Split*x.hi;
hx=C-x.hi;
hx=C-hx;
tx=x.hi-hx;
C=x.hi*x.hi;
c=((((hx*hx-C)+2.0*hx*tx))+tx*tx)+2.0*x.hi*x.lo;
hx=C+c;
r.hi=hx;
r.lo = c+(C-hx);
END_x86_FIX;
return r;
}
extern double pow(double,double);
// doubledouble^int
doubledouble powint(const doubledouble& u, int c)
{
doubledouble result;
if (c<0) return recip(powint(u,-c));
switch (c) {
case 0:
  result.hi = result.lo = 0.0;
  if (u.hi == 0.0)
   result.lo = pow(0.0,0.0);
  else
      result.lo = 1.0;
  return result;
  // let math.h handle 0^0.
case 1:
  return u;
case 2:
  return sqr(u);
case 3:
  return sqr(u)*u;
default:
  { // binary method

   int n=c, m;
   doubledouble y={
    1.0,0.0      }
   , z=u;
   if (n<0) n=-n;
   while (1) {
    m=n;
    n/=2;
    if (n+n!=m) { // if m odd

     y*=z;
     if (0==n) return y;
    }
    z=sqr(z);
   }
  }
}
/* not reached */
return u;
}

doubledouble new_doubledouble(double x,double y)
{
doubledouble result;

x86_FIX;
result.hi = x+y;
result.lo = y+(x-result.hi);
END_x86_FIX;
return result;
}

// Floor. See Graham, Knuth and Patashnik `Concrete Mathematics' page 70.
// Greatest integer <= x
// maybe floor_s is better?
doubledouble ddfloor(const doubledouble& x)
{
double fh=floor(x.hi), fl=floor(x.lo);
double t1=x.hi-fh;
double t2=x.lo-fl;
double t3=t1+t2;
int t;
t3 = x.hi-fh+x.lo-fl;
t=(int)(floor(t3));
switch (t) {
case 0:
  return new_doubledouble(fh,0.0)+new_doubledouble(fl,0.0);
case 1:
  return new_doubledouble(fh,0.0)+new_doubledouble(fl+1,0.0);
  // case 2 can only arise when fp({hi})<=1, fp({lo})<=1, but

fp({hi}+{lo})=2
case 2:
return new_doubledouble(fh,0.0)+new_doubledouble(fl+1.0,0.0);
}
// never get here
return new_doubledouble(0.0,0.0);
}
// Signum
int sign(doubledouble& x)
{
if (x.hi>0.0) return 1;
else if (x.hi<0.0) return -1;
else return 0;
}

char *doubledoubletoa(doubledouble &x,char *s)
{
int Digits;
doubledouble ten = new_doubledouble(10.0,0.0),l,y,z;
double q;
int m,n,d,i;
if (x.hi==0.0)   {
  strcpy(s, "0.0 ");
  return s;
}
if (x.hi!=x.hi) {
  strcpy(s , "NaN ");
  return s;
}
Digits=34;
y=ddfabs(x);
q=log10(y.hi);
n=(int)(floor(q));
if (n<0) n++;
if (n == 0)
  l = new_doubledouble(1.0,0.0);
else
     l = powint(ten,n);
y = y/l;
if (sign(x)<0) *s++ = '-'; //else s<<" ";

d = Digits>34 ? 34 : Digits;
d = d<3 ? 3 : d;
for (i=1; i<=d; i++) {
  if (2==i) *s++ = '.';
  m = (int)(ddfloor(y).hi);
  if (m<0 || m>9) {
   // fprintf(stderr,
//  "Internal error in doubledouble.cc: digit=%d",m);
     }
  sprintf(s,"%d",m);
  s += strlen(s);
  //    z.lo = 0.0;
//    z.hi = (double)m;
z = new_doubledouble((double)m,0.0);
  y = (y-z)*ten;
  if (y.hi<=0.0) break; // x must be an integer

  //if (y.hi==0.0) break; // ???????????
  }
if (n!=0) {
  *s++ = 'e';
  sprintf(s,"%d",n);
  s += strlen(s);
}
else *s++ = 0;
return s;
}
int operator> (const doubledouble& x, const doubledouble& y)
{
return (x.hi> y.hi) || (x.hi==y.hi && x.lo> y.lo);
}
int operator>=(const doubledouble& x, const doubledouble& y)
{
return (x.hi>y.hi) || (x.hi==y.hi && x.lo>=y.lo);
}
int operator< (const doubledouble& x, const doubledouble& y)
{
return (x.hi< y.hi) || (x.hi==y.hi && x.lo< y.lo);
}
int operator<=(const doubledouble& x, const doubledouble& y)
{
return (x.hi<y.hi) || (x.hi==y.hi && x.lo<=y.lo);
}
int operator==(const doubledouble& x, const doubledouble& y)
{
return x.hi==y.hi && x.lo==y.lo;
}
int operator!=(const doubledouble& x, const doubledouble& y)
{
return x.hi!=y.hi || x.lo!=y.lo;
}

struct ddConstants {
doubledouble Log2;
doubledouble Log10;
doubledouble Pi;
doubledouble TwoPi;
doubledouble Pion2;
doubledouble Pion4;
doubledouble _Pi;
} ddconst;

void InitConstants(void)
{
ddconst.Log2 =atodd("0.6931471805599453094172321214581765680755");
ddconst.Log10=atodd("2.302585092994045684017991454684364207601");
ddconst.Pi =atodd("3.1415926535897932384626433832795028841972");
ddconst.TwoPi=atodd("6.2831853071795864769252867665590057683943");
ddconst.Pion2=atodd("1.5707963267948966192313216916397514420985");
ddconst.Pion4=atodd("0.7853981633974483096156608458198757210493");
ddconst._Pi =atodd("0.3183098861837906715377675267450287240689");
}

doubledouble ddldexp(const doubledouble x, const int exp)
{ // x*2^exp

return new_doubledouble(ldexp(x.hi,exp),ldexp(x.lo,exp));
}
// Another floor. V. Shoup 97 Sep 23...
doubledouble ddfloor_s(const doubledouble& x)
{
double fhi=floor(x.hi);
if (fhi!=x.hi)
return new_doubledouble(fhi,0.0);
else
return new_doubledouble(fhi,floor(x.lo));
}

// rint (round to nearest int)
doubledouble ddrint(const doubledouble& x)
{
const doubledouble &rx = x+new_doubledouble(0.5,0.0);
return ddfloor(rx);
}

doubledouble ddexp(const doubledouble& x)
{
/* Uses some ideas by Alan Miller
Method:
x x.log2(e) nint[x.log2(e)] + frac[x.log2(e)]
e = 2 = 2

iy    fy
= 2   . 2
Then
fy    y.loge(2)
2   = e

Now y.loge(2) will be less than 0.3466 in absolute value.
This is halved and a Pade' approximant is used to approximate e^x over
the region (-0.1733, +0.1733). This approximation is then squared.
*/
int iy;
doubledouble y,temp,ysq,sum1,sum2;

if (x.hi<-744.4400719213812) return new_doubledouble(0.0,0.0); //

exp(x)<1e-300

y=x/ddconst.Log2;
iy = (int)(ddrint(y)).hi;
temp=new_doubledouble((double)iy,0.0);
y=(y-temp)*ddconst.Log2;
y=ddldexp(y,-1);
ysq=sqr(y);
sum1=y*((((ysq+3960.)*ysq+2162160.)*ysq+302702400.)*ysq+8821612800.);
sum2=(((90.*ysq+110880.)*ysq+30270240.)*ysq+2075673600.)*ysq+17643225600.;
/*
sum2 + sum1 2.sum1
Now approximation = ----------- = 1 + ----------- = 1 + 2.temp
sum2 - sum1 sum2 - sum1

Then (1 + 2.temp)^2 = 4.temp.(1 + temp) + 1
*/
temp=sum1/(sum2-sum1);
y=temp*(temp+1);
y=ddldexp(y,2);
return ddldexp(y+1,iy);
}

// Natural logarithm
doubledouble ddlog(const doubledouble& x)
{ // Newton method

doubledouble s,e;
if (x.hi<0.0) {
fprintf(stderr,"\ndoubledouble: attempt to take log of negative

argument!\n");
  return new_doubledouble(0.0,0.0);
}
if (x.hi==0.0) {
  fprintf(stderr,"\ndoubledouble: attempt to take log(0)!\n");
  return new_doubledouble(0.0,0.0);
}
s=new_doubledouble(log(x.hi),0.0), e=ddexp(s); // s=double approximation to

result

return s+(x-e)/e; // Newton correction, good enough

//doubledouble d=(x-e)/e; return s+d-0.5*sqr(d); // or 2nd order correction
}

void base_and_prec(void) { // Linnainmaa ACM TOMS 7, 272 Thm 3

int p;
x86_FIX;
printf("Base and precision determination by Linnainmaa's method:\n");
{
  double U,R,u,r,beta;
  U=4.0/3.0;
  U-=1;
  U*=3;
  U-=1;
  U=fabs(U);
  R=U/2+1;
  R-=1;
  if (R!=0.0) U=R;
  u=2;
  u/=3;
  u-=0.5;
  u*=3;
  u-=0.5;
  u=fabs(u);
  r=u/2+0.5;
  r-=0.5;
  if (r!=0.0) u=r;
  beta=U/u;
  p=(int)(-log(u)/log(beta)+0.5);
  printf("Type double: base is %g, precision %d\n",beta,p);
}
{
  doubledouble ddU,ddR,u,r,beta;
  ddU=new_doubledouble(4.0,0.0);
  ddU/=new_doubledouble(3.0,0.0);
  ddU-=1;
  ddU*=3;
  ddU-=1;
  ddU=ddfabs(ddU);
  ddR=ddU/2+1;
  ddR-=1;
  if (ddR.hi!=0.0) ddU=ddR;
  u=new_doubledouble(2.0,0.0);
  u/=3.0;
  u-=0.5;
  u*=3;
  u-=0.5;
  u=ddfabs(u);
  r=u/2+0.5;
  r-=0.5;
  if (r.hi!=0.0) u=r;
  beta=ddU/u;
  p=(int)(-ddlog(u)/ddlog(beta)+0.5).hi;
  printf("Type doubledouble:   base is %g, precision %d\n",beta.hi,p);
}
END_x86_FIX;
}
// Assumption: if it works, don't even bother doing output
int Verify(char msg[], doubledouble correct, doubledouble computed)
{
// default fractional error allowed.
doubledouble delta = new_doubledouble(1.1e-31,0.0);
if(correct == computed)
  return 1;
else if(correct.hi == 0.0) // can't do a relative test
{
error:
  printf("%s", msg);
  printf(" corret.hi %g correct.lo %g", correct.hi,correct.lo);
  printf("\ngot hi %g, lo %g\n",computed.hi,computed.lo);
  return 0;
}
else if(ddfabs((correct-computed)/correct) <= delta)
  return 1;
else goto error;
}

int main(void)
{
doubledouble a = new_doubledouble(5.0,0.0);
doubledouble b = new_doubledouble(4.0,0.0);
doubledouble c = a*b;
double x1,x2;
char buffer[256];
int i;

InitConstants();
base_and_prec();
doubledoubletoa(ddconst.Log2,buffer);
printf("%s\n",buffer);
x1=(pow(2,53)-2)/pow(2,53);
x2=(3.0)/pow(2,55);
Verify("Test -2: should be 0: ",
  new_doubledouble(0.0,0.0),
  ddfloor(new_doubledouble(x1,0.0)+new_doubledouble(x2,0.0)));
Verify("Test -1: should be 0: ", new_doubledouble(0.0,0.0),
      ddfloor(atodd("0.9999999999999997779553950749686919152737")
  +

atodd("0.00000000000000008326672684688674053177237510681152343750")));
for (i=0; i<5;i++) {
  c = c*c;
  doubledoubletoa(c,buffer);
  printf("%s\n",buffer);
  a = c - 1.0;
  b = new_doubledouble(-1.0,0.0);
  a = c + b;
  doubledoubletoa(a,buffer);
  printf("%s\n",buffer);
}
return 0;
}

相关文章：

相关软件：