python - Python バイナリデータの読み取り

Question

urllib2 リクエストは、次のようなバイナリレスポンスを受け取ります。

00 00 00 01 00 04 41 4D 54 44 00 00 00 00 02 41
97 33 33 41 99 5C 29 41 90 3D 71 41 91 D7 0A 47
0F C6 14 00 00 01 16 6A E0 68 80 41 93 B4 05 41
97 1E B8 41 90 7A E1 41 96 8F 57 46 E6 2E 80 00
00 01 16 7A 53 7C 80 FF FF

その構造は次のとおりです。

DATA, TYPE, DESCRIPTION
 
00 00 00 01, 4 bytes, Symbol Count =1
 
00 04, 2 bytes, Symbol Length = 4
 
41 4D 54 44, 6 bytes, Symbol = AMTD
 
00, 1 byte, Error code = 0 (OK)
 
00 00 00 02, 4 bytes, Bar Count =  2
 
FIRST BAR
 
41 97 33 33, 4 bytes, Close = 18.90
 
41 99 5C 29, 4 bytes, High = 19.17
 
41 90 3D 71, 4 bytes, Low = 18.03
 
41 91 D7 0A, 4 bytes, Open = 18.23
 
47 0F C6 14, 4 bytes, Volume = 3,680,608
 
00 00 01 16 6A E0 68 80, 8 bytes, Timestamp = November 23,2007
 
SECOND BAR
 
41 93 B4 05, 4 bytes, Close = 18.4629
 
41 97 1E B8, 4 bytes, High = 18.89
 
41 90 7A E1, 4 bytes, Low = 18.06
 
41 96 8F 57, 4 bytes, Open = 18.82
 
46 E6 2E 80, 4 bytes, Volume = 2,946,325
 
00 00 01 16 7A 53 7C 80, 8 bytes, Timestamp = November 26,2007
 
TERMINATOR
 
FF FF, 2 bytes,

このようなバイナリデータを読み取る方法は?

前もって感謝します。

アップデート：

次のコードを使用して、最初の 6 バイトで構造体モジュールを試しました。

struct.unpack('ih', response.read(6))

(16777216、1024)

しかし、それは (1, 4) を出力するはずです。説明書を見ましたが、何が悪いのかわかりません。

score 10 · Accepted Answer

だから、これがあなたが提供しているデータを解釈する上での私のベストショットです...：

import datetime
import struct

class Printable(object):
  specials = ()
  def __str__(self):
    resultlines = []
    for pair in self.__dict__.items():
      if pair[0] in self.specials: continue
      resultlines.append('%10s %s' % pair)
    return '\n'.join(resultlines)

head_fmt = '>IH6sBH'
head_struct = struct.Struct(head_fmt)
class Header(Printable):
  specials = ('bars',)
  def __init__(self, symbol_count, symbol_length,
               symbol, error_code, bar_count):
    self.__dict__.update(locals())
    self.bars = []
    del self.self

bar_fmt = '>5fQ'
bar_struct = struct.Struct(bar_fmt)
class Bar(Printable):
  specials = ('header',)
  def __init__(self, header, close, high, low,
               open, volume, timestamp):
    self.__dict__.update(locals())
    self.header.bars.append(self)
    del self.self
    self.timestamp /= 1000.0
    self.timestamp = datetime.date.fromtimestamp(self.timestamp)

def showdata(data):
  terminator = '\xff' * 2
  assert data[-2:] == terminator
  head_data = head_struct.unpack(data[:head_struct.size])
  try:
    assert head_data[4] * bar_struct.size + head_struct.size == \
           len(data) - len(terminator)
  except AssertionError:
    print 'data length is %d' % len(data)
    print 'head struct size is %d' % head_struct.size
    print 'bar struct size is %d' % bar_struct.size
    print 'number of bars is %d' % head_data[4]
    print 'head data:', head_data
    print 'terminator:', terminator
    print 'so, something is wrong, since',
    print head_data[4] * bar_struct.size + head_struct.size, '!=',
    print len(data) - len(terminator)
    raise

  head = Header(*head_data)
  for i in range(head.bar_count):
    bar_substr = data[head_struct.size + i * bar_struct.size:
                      head_struct.size + (i+1) * bar_struct.size]
    bar_data = bar_struct.unpack(bar_substr)
    Bar(head, *bar_data)
  assert len(head.bars) == head.bar_count
  print head
  for i, x in enumerate(head.bars):
    print 'Bar #%s' % i
    print x

datas = '''
00 00 00 01 00 04 41 4D 54 44 00 00 00 00 02 41
97 33 33 41 99 5C 29 41 90 3D 71 41 91 D7 0A 47
0F C6 14 00 00 01 16 6A E0 68 80 41 93 B4 05 41
97 1E B8 41 90 7A E1 41 96 8F 57 46 E6 2E 80 00
00 01 16 7A 53 7C 80 FF FF
'''

data = ''.join(chr(int(x, 16)) for x in datas.split())
showdata(data)

これは放出します：

symbol_count 1
 bar_count 2
    symbol AMTD
error_code 0
symbol_length 4
Bar #0
    volume 36806.078125
 timestamp 2007-11-22
      high 19.1700000763
       low 18.0300006866
     close 18.8999996185
      open 18.2299995422
Bar #1
    volume 29463.25
 timestamp 2007-11-25
      high 18.8899993896
       low 18.0599994659
     close 18.4629001617
      open 18.8199901581

...これは、出力フォーマットの詳細を除いて、必要なものにかなり近いようです。お役に立てれば！-）

score 6 · Accepted Answer

>>> data
'\x00\x00\x00\x01\x00\x04AMTD\x00\x00\x00\x00\x02A\x9733A\x99\\)A\x90=qA\x91\xd7\nG\x0f\xc6\x14\x00\x00\x01\x16j\xe0h\x80A\x93\xb4\x05A\x97\x1e\xb8A\x90z\xe1A\x96\x8fWF\xe6.\x80\x00\x00\x01\x16zS|\x80\xff\xff'
>>> from struct import unpack, calcsize
>>> scount, slength = unpack("!IH", data[:6])
>>> assert scount == 1
>>> symbol, error_code = unpack("!%dsb" % slength, data[6:6+slength+1])
>>> assert error_code == 0
>>> symbol
'AMTD'
>>> bar_count = unpack("!I", data[6+slength+1:6+slength+1+4])
>>> bar_count
(2,)
>>> bar_format = "!5fQ"                                                         
>>> from collections import namedtuple
>>> Bar = namedtuple("Bar", "Close High Low Open Volume Timestamp")             
>>> b = Bar(*unpack(bar_format, data[6+slength+1+4:6+slength+1+4+calcsize(bar_format)]))
>>> b
Bar(Close=18.899999618530273, High=19.170000076293945, Low=18.030000686645508, Open=18.229999542236328, Volume=36806.078125, Timestamp=1195794000000L)
>>> import time
>>> time.ctime(b.Timestamp//1000)
'Fri Nov 23 08:00:00 2007'
>>> int(b.Volume*100 + 0.5)
3680608

score 5 · Accepted Answer

>>> struct.unpack('ih', response.read(6))
(16777216, 1024)

リトルエンディアンマシンでビッグエンディアンデータを展開しています。代わりにこれを試してください：

>>> struct.unpack('!IH', response.read(6))
(1L, 4)

これは、データをネットワーク順 (ビッグエンディアン) で考慮するように unpack に指示します。また、カウントと長さの値を負にすることはできないため、フォーマット文字列では符号なしのバリアントを使用する必要があります。

score 2 · Accepted Answer

2

struct モジュールのstruct.unpackを見てください。

于 2009-10-20T00:54:39.787 に答える

score 1 · Accepted Answer

「struct」パッケージの pack/unpack 関数を使用します。詳細はこちらhttp://docs.python.org/library/struct.html

さよなら！

score 0 · Accepted Answer

すでに述べたように、struct使用する必要があるモジュールです。

バイト順などについては、ドキュメントをお読みください。

あなたの例では、次のことを行う必要があります（データはビッグエンディアンで署名されていないため）：

>>> import struct
>>> x = '\x00\x00\x00\x01\x00\x04'
>>> struct.unpack('>IH', x)
(1, 4)

python - Python バイナリ データの読み取り

アップデート：

6 に答える 6

Related

Reference

python - Python バイナリデータの読み取り