c - LZO をファイルストリームで動作させるにはどうすればよいですか?

Question

LZO を使用してファイルストリームを圧縮しようとしていますが、あまり効果がありません。具体的には、関数によって作成されたアーカイブファイルを抽出するときに、セグメンテーションエラーが発生しcompressFileWithLzo1xます。

私のmain関数とプロトタイプの宣言は次のとおりです。

#include <stdio.h>
#include <stdlib.h>
#include "lzo/include/lzo/lzo1x.h"

#define LZO_IN_CHUNK (128*1024L)
#define LZO_OUT_CHUNK (LZO_IN_CHUNK + LZO_IN_CHUNK/16 + 64 + 3)

int compressFileWithLzo1x(const char *inFn, const char *outFn);
int extractFileWithLzo1x(const char *inFn);

int main(int argc, char **argv) {

    const char *inFilename = "test.txt";
    const char *outFilename = "test.txt.lzo1x";

    if ( compressFileWithLzo1x(inFilename, outFilename) != 0 )
        exit(EXIT_FAILURE);

    if ( extractFileWithLzo1x(outFilename) != 0 )
        exit(EXIT_FAILURE);

    return 0;
}

これが私の圧縮関数の実装です：

int compressFileWithLzo1x(const char *inFn, const char *outFn) {

    FILE *inFnPtr = fopen(outFn, "r");
    FILE *outFnPtr = fopen(outFn, "wb");
    int compressionResult;
    lzo_bytep in;
    lzo_bytep out;
    lzo_voidp wrkmem;
    lzo_uint out_len;
    size_t inResult;

    if (lzo_init() != LZO_E_OK)
        return -1;

    in = (lzo_bytep)malloc(LZO_IN_CHUNK);
    out = (lzo_bytep)malloc(LZO_OUT_CHUNK);
    wrkmem = (lzo_voidp)malloc(LZO1X_1_MEM_COMPRESS);

    do { 
        inResult = fread(in, sizeof(lzo_byte), LZO_IN_CHUNK, inFnPtr);
        if (inResult == 0)
            break;
        compressionResult = lzo1x_1_compress(in, LZO_IN_CHUNK, out, &out_len, wrkmem);
        if ((out_len >= LZO_IN_CHUNK) || (compressionResult != LZO_E_OK))
            return -1;
        if (fwrite(out, sizeof(lzo_byte), (size_t)out_len, outFnPtr) != (size_t)out_len || ferror(outFnPtr))
            return -1;
        fflush(outFnPtr);
    } while (!feof(inFnPtr) && !ferror(inFnPtr));

    free(wrkmem);
    free(out);
    free(in);
    fclose(inFnPtr);
    fclose(outFnPtr);

    return 0;
}

これが私の解凍関数の実装です：

int extractFileWithLzo1x(const char *inFn) {

    FILE *inFnPtr = fopen(inFn, "rb");
    lzo_bytep in = (lzo_bytep)malloc(LZO_IN_CHUNK);
    lzo_bytep out = (lzo_bytep)malloc(LZO_OUT_CHUNK);
    int extractionResult; 
    size_t inResult;
    lzo_uint new_length;

    if (lzo_init() != LZO_E_OK)
        return -1;

    do {
        new_length = LZO_IN_CHUNK;
        inResult = fread(in, sizeof(lzo_byte), LZO_IN_CHUNK, inFnPtr);
        extractionResult = lzo1x_decompress(out, LZO_OUT_CHUNK, in, &new_length, NULL);
        if ((extractionResult != LZO_E_OK) || (new_length != LZO_IN_CHUNK))
            return -1;
        fprintf(stderr, "out: [%s]\n", (unsigned char *)out);
    } while (!feof(inFnPtr) && (!ferror(inFnPtr));

    free(in);
    free(out);
    fclose(inFnPtr);

    return 0;
}

ここでセグメンテーション違反が発生します。

extractionResult = lzo1x_decompress(out, LZO_OUT_CHUNK, in, &new_length, NULL);

セグメンテーション違反を引き起こしているこのアプローチの何が問題なのですか?

今回はコードを省略していないことを願っています。さらに情報を追加する必要がある場合は、お気軽にお知らせください。アドバイスをいただきありがとうございます。

score 2 · Accepted Answer

独立したブロックを圧縮しています。LZO デコンプレッサは圧縮データのバイト長を必要とします。これは、EOF をデコードするときに、すべての入力バイトを消費したかどうかを確認し (消費していない場合はエラーを返す)、圧縮された各チャンクの長さも格納する必要があるためです。 . したがって、より複雑なファイル形式が必要になります。例えば：

# compressing, in python-like pseudocode
ifile = open("data", "rb")
ofile = open("data.mylzo", "wb")
input, input_len = ifile.read(65536)
while input_len > 0:
  compressed, compressed_len = lzo1x(input, input_len)
  compressed_len -= 1 # store len-1 of next block
  if compressed_len < 65536 - 1:
    ofile.write(compressed_len & 255) # be sure of endianess in file formats!
    ofile.write(compressed_len >> 8)
    ofile.write(compressed)
  else:
    ofile.write(255) # incompressible block stored it as-is (saves space & time).
    ofile.write(255)
    ofile.write(input)
  input, input_len = ifile.read(65536)
ofile.close()
ifile.close()

# decompressing, in python-like pseudocode
ifile = open("data.mylzo", "rb")
ofile = open("data", "wb")
compressed_len_s = ifile.read(2)
while len(compressed_len_s) == 2:
  compressed_len = (compressed_len_s[0] | (compressed_len_s[1] << 8)) + 1
  if compressed_len == 65536:
    ofile.write(ifile.read(65536)) # this can be done without copying
  else:
    compressed = ifile.read(compressed_len)
    decompressed = lzo1x_decompress(compressed, compressed_len)
    ofile.write(decompressed)
  compressed_len_s = ifile.read(2)
ofile.close()
ifile.close()

スキップせずにチャンクを解凍できるようにしたい場合 (並列アクセスまたはランダムアクセスでの解凍のいずれか)、圧縮されたチャンクの長さを最初のチャンクの前に配置する必要があります。それらの前にチャンクの数を付けます。

最後のチャンクは 64k より短く、圧縮できない可能性がありますが、完全な 64k ブロックのみがそのまま保存されるため、圧縮されていない形式よりも長くても、圧縮形式を保存します。ファイル全体が 64k より短い場合は、サイズが大きくなります。

score 1 · Accepted Answer

あなたが与えたコードはコンパイルされません（さまざまな場所などではなく=、#defines;で誤っています）。しかし：inFilePtrinFnPtr

圧縮する場合、によって返される実際のデータ量は考慮されてfread()いません。これは、よりも少ない可能性がありますLZO_IN_CHUNK。
```
compressionResult = lzo1x_1_compress(in, LZO_IN_CHUNK, out, &out_len, wrkmem);
```
おそらくあるはずです
```
compressionResult = lzo1x_1_compress(in, inResult, out, &out_len, wrkmem);
```
(これが問題になる可能性は低いですが、ファイルの最後に偽のジャンクが追加されます。)
解凍すると、同様の問題が発生し、 in / out 引数が逆になり、セグメンテーション違反の原因になる可能性があります。
```
extractionResult = lzo1x_decompress(out, LZO_OUT_CHUNK, in, &new_length, NULL);
```
おそらくあるはずです
```
extractionResult = lzo1x_decompress(in, inResult, out, &new_length, NULL);
```

c - LZO をファイル ストリームで動作させるにはどうすればよいですか?

3 に答える 3

Related

Reference

c - LZO をファイルストリームで動作させるにはどうすればよいですか?