c - 拡張範囲の浮動小数点型

Question

Cで非常に小さいオーダー (たとえば、 0.6745 × 2 ^-3000 ) の浮動小数点数を表す必要があります。そのようなサポートがプラットフォームに依存しない (CPU と GPU-CUDA の両方で動作する) 必要があります。有効桁数を長くする必要はありません。

高精度ライブラリ(GMP、MPFRなど)はGPU上で動かないので使えません。一方、CUDA はlong double型をサポートしていません。解決策はありますか？カスタム浮動小数点型をどうにかして実装することは可能ですか?

score 1 · Accepted Answer

対数空間で作業できます。つまり、各数値を e ^xとして表します。ここで、x は標準の浮動小数点型です。

加算/減算 (およびより一般的な合計) は、log-sum-exp トリックを使用して実行できます。つまり、
- e ^x +e ^y = e ^x (1+e ^y-x ) = e ^{x + log(1+exp(yx))}
掛け算/割り算が足し算/引き算になる
- e ^x × e ^x = e ^x+y
累乗はかなり簡単です
- (e ^x )^(e ^x ) = e ^{x exp(y)}

score 0 · Accepted Answer

私は簡単な解決策を書きました（この作業を使用して）：

#include <math.h>
#include <stdint.h>

#define DOUBLE_PRECISION 53

/*  DOUBLE PRECISION FLOATING-POINT TYPE WITH EXTENDED EXPONENT     */

typedef struct Real {
    double sig;     //significand
    long exp;       //binary exponent
} real;

/*  UNION FOR DIVISION DOUBLE BY 2^POW                              */

union DubleIntUnion
{
    double      dvalue;
    uint64_t    ivalue;
};


/*  PLACE SIGNIFICAND OF REAL NUMBER IN RANGE [1, 2)            */

inline real adjust(real x){
    real y;
    y.exp = x.exp;
    y.sig = x.sig;
    if(y.sig == 0){
        y.exp = 0;
    } else if (fabs(y.sig) >= 2.0){
        y.exp = y.exp + 1;
        y.sig = y.sig / 2;
    } else if(fabs(y.sig) < 1){
        y.exp = y.exp - 1;
        y.sig = y.sig * 2;
    }
    return y;
}

/*  PLACE SIGNIFICAND OF REAL NUMBER IN RANGE [1, 2) FOR TINY NUMBER    */
/*  FOR EXAMPLE, AFTER SUBTRATION OR WHEN SET REAL FROM DOUBLE          */

inline real adjusttiny(real x){
    real y;
    y.exp = x.exp;
    y.sig = x.sig;
    while(1){
        x.exp = y.exp;
        x.sig = y.sig;
        y = adjust(x);
        if(x.exp == y.exp && x.sig == y.sig)
            break;
    }
    return y;
}

real set(double x){
    real y;
    real z;
    y.sig = x;
    y.exp = 0;
    return adjusttiny(y);
};

real set(real x){
    real y;
    y.exp = x.exp;
    y.sig = x.sig;
    return y;
};

/*  ARITHMETIC OPERATIONS   */

//divide x by 2^pow. Assert that x.exp - pow > e_min
inline double div2pow(const double x, const int pow)
{
    DubleIntUnion diu;
    diu.dvalue = x;
    diu.ivalue -= (uint64_t)pow << 52;      // subtract pow from exponent
    return diu.dvalue;
}

//summation
inline real sum(real x, real y){            
    real sum;
    int dexp = abs(x.exp - y.exp);

    if (x.exp > y.exp){
        sum.exp = x.exp;
        if(dexp <= DOUBLE_PRECISION){           
            sum.sig = div2pow(y.sig, dexp);     // divide y by 2^(x.exp - y.exp)
            sum.sig = sum.sig + x.sig;
        } else sum.sig = x.sig;
    } else if (y.exp > x.exp){
        sum.exp = y.exp;
        if(dexp <= DOUBLE_PRECISION){           
            sum.sig = div2pow(x.sig, dexp);     // divide x by 2^(y.exp - x.exp)
            sum.sig = sum.sig + y.sig;
        } else
            sum.sig = y.sig;
    } else {
        sum.exp = x.exp;
        sum.sig = x.sig + y.sig;
    }
    return adjust(sum);
}

//subtraction
inline real sub(real x, real y){            
    real sub;
    int dexp = abs(x.exp - y.exp);

    if (x.exp > y.exp){
        sub.exp = x.exp;
        if(dexp <= DOUBLE_PRECISION){           
            sub.sig = div2pow(y.sig, dexp); // divide y by 2^(x.exp - y.exp)
            sub.sig = x.sig - sub.sig;
        } else sub.sig = x.sig;
    } else if (y.exp > x.exp){
        sub.exp = y.exp;
        if(dexp <= DOUBLE_PRECISION){           
            sub.sig = div2pow(x.sig, dexp); // divide x by 2^(y.exp - x.exp)
            sub.sig = sub.sig - y.sig; 
        } else sub.sig = -y.sig;
    } else {
        sub.exp = x.exp;
        sub.sig = x.sig - y.sig;
    }
    return adjusttiny(sub);
}

//multiplication
inline real mul(real x, real y){            
    real product;
    product.exp = x.exp + y.exp;
    product.sig = x.sig * y.sig;
    return adjust(product);
}

//division
inline real div(real x, real y){            
    real quotient;
    quotient.exp = x.exp - y.exp;
    quotient.sig = x.sig / y.sig;
    return adjust(quotient);
}

一見すると正しく動作しています。多分私は何かを逃したか、実装を加速することができますか?

floor関数やceilそのような数値を実装するにはどうすればよいですか?

score 0 · Accepted Answer

非常に大きな指数が必要な場合は、対称的なレベルインデックス演算が必要になる場合があります。ただし、精度を予測するのは難しいため、補正するために LI (レベルインデックス) 値の精度を高める必要がある場合があります。精度を向上させる一般的な方法の 1 つは、CUDA でもよく使用されるdouble-double 演算です。

CUMPのような CUDA には多数の多精度ライブラリもあります。

いくつかの詳細情報:

c - 拡張範囲の浮動小数点型

3 に答える 3

Related

Reference