c - cでビットセット/バイト配列変換を行う方法

Question

与えられた配列 unsigned char q[32]="1100111...",

unsigned char p[4]このビットセットのビットが配列内の値と等しくなるように、4 バイトのビットセットを生成するにはどうすればよいですか。たとえば、最初のバイト p[0]= "q[0] ... q [7]"; 2バイト目 p[1]="q[8] ... q[15]" など

また、逆にそれを行う方法、つまり、ビットセットを指定して配列を生成する方法は?

私自身の最初の部分の試行。

unsigned char p[4]={0};
for (int j=0; j<N; j++) 
{
    if (q[j] == '1')
    {
        p [j / 8] |= 1 << (7-(j % 8)); 
    }            
}

上記は正しいですか？確認する条件はありますか？もっと良い方法はありますか？

編集 - 1

上記が効率的な方法なのだろうか？配列のサイズは最大 4096 またはそれ以上になる可能性があるためです。

score 3 · Accepted Answer

まず、を使用strtoulして 32 ビット値を取得します。次に、バイトオーダーをでビッグエンディアンに変換しますhtonl。最後に、結果を配列に保存します。

#include <arpa/inet.h>
#include <stdlib.h>

/* ... */
unsigned char q[32] = "1100111...";
unsigned char result[4] = {0};
*(unsigned long*)result = htonl(strtoul(q, NULL, 2));

他の方法もあります。

でも足りない`<arpa/inet.h>`！

次に、プラットフォームのバイト順を知る必要があります。ビッグエンディアンの場合htonlは何もせず、省略できます。リトルエンディアンの場合htonlは、次のとおりです。

unsigned long htonl(unsigned long x)
{
    x = (x & 0xFF00FF00) >> 8) | (x & 0x00FF00FF) << 8);
    x = (x & 0xFFFF0000) >> 16) | (x & 0x0000FFFF) << 16);
    return x;
}

運が良ければ、オプティマイザーがユーザーの作業を認識して、効率的なコードに変換する可能性があります。そうでない場合は、少なくともレジスタと O(log N) ですべて実装可能です。

プラットフォームのバイト順がわからない場合は、それを検出する必要があります。

typedef union {
    char c[sizeof(int) / sizeof(char)];
    int i;
} OrderTest;

unsigned long htonl(unsigned long x)
{
    OrderTest test;
    test.i = 1;
    if(!test.c[0])
        return x;

    x = (x & 0xFF00FF00) >> 8) | (x & 0x00FF00FF) << 8);
    x = (x & 0xFFFF0000) >> 16) | (x & 0x0000FFFF) << 16);
    return x;
}

たぶん`long`8バイトです！

さて、OPは配列サイズで4バイトの入力を暗示していましたが、8バイトlongは実行可能です:

#define kCharsPerLong (sizeof(long) / sizeof(char))
unsigned char q[8 * kCharsPerLong] = "1100111...";
unsigned char result[kCharsPerLong] = {0};
*(unsigned long*)result = htonl(strtoul(q, NULL, 2));

unsigned long htonl(unsigned long x)
{
#if kCharsPerLong == 4
    x = (x & 0xFF00FF00UL) >> 8) | (x & 0x00FF00FFUL) << 8);
    x = (x & 0xFFFF0000UL) >> 16) | (x & 0x0000FFFFUL) << 16);
#elif kCharsPerLong == 8
    x = (x & 0xFF00FF00FF00FF00UL) >> 8) | (x & 0x00FF00FF00FF00FFUL) << 8);
    x = (x & 0xFFFF0000FFFF0000UL) >> 16) | (x & 0x0000FFFF0000FFFFUL) << 16);
    x = (x & 0xFFFFFFFF00000000UL) >> 32) | (x & 0x00000000FFFFFFFFUL) << 32);
#else
#error Unsupported word size.
#endif
    return x;
}

それは 8 ビットではないためchar(DSP はこれを行うのが好きです)、自分で作業する必要があります。(これが、SHARC シリーズの DSP が 8 ビットのバイトを持っていたときに大したことだった理由です。これにより、既存のコードの移植が非常に簡単になりました。これは、C が移植性サポートのひどい仕事をしているためです。)

任意の長さのバッファはどうですか? おかしなポインタ型キャストはやめてください。

OP のバージョンで改善できる主な点は、ループの内部を再考することです。出力バイトを固定データレジスタと考える代わりに、連続する各ビットが右端 (LSB) にシフトされるシフトレジスタと考えてください。これにより、これらすべての部門と改造からあなたを救うことができます (うまくいけば、ビットシフトに最適化されます)。

unsigned char正気を保つために、私はuint8_t.

#include <stdint.h>

unsigned StringToBits(const char* inChars, uint8_t* outBytes, size_t numBytes,
    size_t* bytesRead)
/* Converts the string of '1' and '0' characters in `inChars` to a buffer of
 * bytes in `outBytes`. `numBytes` is the number of available bytes in the
 * `outBytes` buffer. On exit, if `bytesRead` is not NULL, the value it points
 * to is set to the number of bytes read (rounding up to the nearest full
 * byte). If a multiple of 8 bits is not read, the last byte written will be
 * padded with 0 bits to reach a multiple of 8 bits. This function returns the
 * number of padding bits that were added. For example, an input of 11 bits
 * will result `bytesRead` being set to 2 and the function will return 5. This
 * means that if a nonzero value is returned, then a partial byte was read,
 * which may be an error.
 */
{   size_t bytes = 0;
    unsigned bits = 0;
    uint8_t x = 0;

    while(bytes < numBytes)
    {   /* Parse a character. */
        switch(*inChars++)
        {   '0': x <<= 1; ++bits; break;
            '1': x = (x << 1) | 1; ++bits; break;
            default: numBytes = 0;
        }

        /* See if we filled a byte. */
        if(bits == 8)
        {   outBytes[bytes++] = x;
            x = 0;
            bits = 0;
        }
    }

    /* Padding, if needed. */
    if(bits)
    {   bits = 8 - bits;
        outBytes[bytes++] = x << bits;
    }

    /* Finish up. */
    if(bytesRead)
        *bytesRead = bytes;
    return bits;
}

inCharsnull で終了していることを確認するのはあなたの責任です。この関数は、'0'or以外の最初の'1'文字が検出された場合、または出力バッファが不足した場合に戻ります。使用例:

unsigned char q[32] = "1100111...";
uint8_t buf[4];
size_t bytesRead = 5;
if(StringToBits(q, buf, 4, &bytesRead) || bytesRead != 4)
{
    /* Partial read; handle error here. */
}

これは 4 バイトを読み取るだけで、読み取れない場合はエラーをトラップします。

unsigned char q[4096] = "1100111...";
uint8_t buf[512];
StringToBits(q, buf, 512, NULL);

これは、変換できるものを変換し、残りを 0 ビットに設定するだけです。

breakこの関数は、Cに複数レベルのループまたはswitch;から抜け出す機能があれば、より適切に実行できます。現状では、同じ効果を得るにはフラグ値を追加する必要がありますが、これは混乱です。または、を追加する必要がgotoあります。

score 2 · Accepted Answer

私はそれがうまくいくとは思わない。各「ビット」を1、実際にあるべきときに比較しています'1'。if：を取り除くことで、もう少し効率的にすることもできます。

unsigned char p[4]={0};
for (int j=0; j<32; j++) 
{
    p [j / 8] |= (q[j] == `1`) << (7-(j % 8));           
}

逆に行くのもとても簡単です。前に設定した「ビット」ごとにマスクするだけです。

unsigned char q[32]={0};
for (int j=0; j<32; j++) {
  q[j] = p[j / 8] & ( 1 << (7-(j % 8)) ) + '0';
}

(boolean) + '0'1/0と「1」/「0」の間で変換するためのクリエイティブな使用に気付くでしょう。

score 1 · Accepted Answer

極端な効率を求めている場合は、次の手法を使用してみてください。

ifの減算で置き換えます（入力記号はまたは'0'のみであると想定できるようです）。また、低いインデックスから高いインデックスへの入力を処理します。01

for (int c = 0; c < N; c += 8)
{
    int y = 0;
    for (int b = 0; b < 8; ++b)
        y = y * 2 + q[c + b] - '0';
    p[c / 8] = y;
}

配列インデックスを自動インクリメントポインターに置き換えます。

const char* qptr = q;
unsigned char* pptr = p;
for (int c = 0; c < N; c += 8)
{
    int y = 0;
    for (int b = 0; b < 8; ++b)
        y = y * 2 + *qptr++ - '0';
    *pptr++ = y;
}

内側のループを展開します。

const char* qptr = q;
unsigned char* pptr = p;
for (int c = 0; c < N; c += 8)
{
    *pptr++ =
        qptr[0] - '0' << 7 |
        qptr[1] - '0' << 6 |
        qptr[2] - '0' << 5 |
        qptr[3] - '0' << 4 |
        qptr[4] - '0' << 3 |
        qptr[5] - '0' << 2 |
        qptr[6] - '0' << 1 |
        qptr[7] - '0' << 0;
    qptr += 8;
}

複数の入力文字を同時に処理する (ビット操作ハックまたは MMX 命令を使用) - これには大きなスピードアップの可能性があります!

score 1 · Accepted Answer

あなたの例によると、読みやすさを求めているようには見えず、（遅い）リフレッシュの後、私のソリューションは、操作の順序と !! の追加による括弧の欠如を除いて、Chriszuma と非常によく似ています。0 または 1 を強制します。

const size_t N = 32; //N must be a multiple of 8
unsigned char q[N+1] = "11011101001001101001111110000111";
unsigned char p[N/8] = {0};
unsigned char r[N+1] = {0}; //reversed

for(size_t i = 0; i < N; ++i)
    p[i / 8] |= (q[i] == '1') << 7 - i % 8;

for(size_t i = 0; i < N; ++i)
    r[i] = '0' + !!(p[i / 8] & 1 << 7 - i % 8);

printf("%x %x %x %x\n", p[0], p[1], p[2], p[3]);
printf("%s\n%s\n", q,r);

c - cでビットセット/バイト配列変換を行う方法

4 に答える 4

でも足りない<arpa/inet.h>！

たぶんlong8バイトです！

任意の長さのバッファはどうですか? おかしなポインタ型キャストはやめてください。

Related

Reference

でも足りない`<arpa/inet.h>`！

たぶん`long`8バイトです！