python - 正しい文字列の代わりに null バイトをファイルに書き込む

Question

データファイルを処理するPythonスクリプトがあります:

out = open('result/process/'+name+'.res','w')
out.write("source,rssi,lqi,packetId,run,counter\n")
f = open('result/resultat0.res','r')
for ligne in [x for x in f if x != '']:
    chaine = ligne.rstrip('\n')
    tmp = chaine.split(',')
    if (len(tmp) == 6 ):
        out.write(','.join(tmp)+"\n")
f.close()

完全なコードはこちら

このスクリプトを複数のコンピューターで使用していますが、動作が異なります。Python 2.6.6 を使用する最初のコンピューターでは、結果は期待どおりです。ただし、他のもの (python 2.6.6、3.3.2、2.7.5) では、ファイルオブジェクトの書き込みメソッドは、処理の大部分で必要な値の代わりに null バイトを配置します。私はこの結果を得る：

$ hexdump -C result/process/1.res
00000000  73 6f 75 72 63 65 2c 72  73 73 69 2c 6c 71 69 2c  |source,rssi,lqi,|
00000010  70 61 63 6b 65 74 49 64  2c 72 75 6e 2c 63 6f 75  |packetId,run,cou|
00000020  6e 74 65 72 0a 00 00 00  00 00 00 00 00 00 00 00  |nter............|
00000030  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
*
0003a130  00 00 00 00 00 00 00 00  00 00 31 33 2c 36 35 2c  |..........13,65,|
0003a140  31 34 2c 38 2c 39 38 2c  31 33 31 34 32 0a 31 32  |14,8,98,13142.12|
0003a150  2c 34 37 2c 31 37 2c 38  2c 39 38 2c 31 33 31 34  |,47,17,8,98,1314|
0003a160  33 0a 33 2c 34 35 2c 31  38 2c 38 2c 39 38 2c 31  |3.3,45,18,8,98,1|
0003a170  33 31 34 34 0a 31 31 2c  38 2c 32 33 2c 38 2c 39  |3144.11,8,23,8,9|
0003a180  38 2c 31 33 31 34 35 0a  39 2c 32 30 2c 32 32 2c  |8,13145.9,20,22,|

この問題を解決する方法を教えてください。

score 2 · Accepted Answer

次の考慮事項があります。

10 年以上にわたって Python をプログラミングしてきましたが、使用する説得力のある理由に出くわしたことはありませんglobal。代わりに引数を関数に渡します。
終了時にファイルが確実に閉じられるようにするには、with ステートメントを使用します。

これは、健全性のためにコードをリファクタリングする (テストされていない) 試みです。特定の識別子の下にあるすべての行を保持するのに十分なメモリがあることを前提としています。

このリファクタリング後に結果ファイルに null バイトがある場合は、デバッグを続行する合理的な根拠があります。

import os
import re
from contextlib import closing

def list_files_to_process(directory='results'):
  """
  Return a list of files from directory where the file extension is '.res',
  case insensitive.
  """
  results = []
  for filename in os.listdir(directory):
    filepath = os.path.join(directory,filename)
    if os.path.isfile(filepath) and filename.lower().endswith('.res'):
      results.append(filepath)
  return results

def group_lines(sequence):
  """
  Generator, process a sequence of lines, separated by a particular line.
  Yields batches of lines along with the id from the separator.
  """
  separator = re.compile('^A:(?P<id>\d+):$')
  batch = []
  batch_id = None
  for line in sequence:
    if not line: # Ignore blanks
      continue
    m = separator.match(line):
    if m is not None:
      if batch_id is not None or len(batch) > 0:
        yield (batch_id,batch)
      batch_id = m.group('id')
      batch = []
    else:
      batch.append(line)
  if batch_id is not None or len(batch) > 0:
    yield (batch_id,batch)

def filename_for_results(batch_id,result_directory):
  """
  Return an appropriate filename for a batch_id under the result directory
  """
  return os.path.join(result_directory,"results-%s.res" % (batch_id,))

def open_result_file(filename,header="source,rssi,lqi,packetId,run,counter"):
  """
  Return an open file object in append mode, having appended a header if 
  filename doesn't exist or is empty
  """
  if os.path.exists(filename) and os.path.getsize(filename) > 0:
    # No need to write header
    return open(filename,'a')
  else:
    f = open(filename,'a')
    f.write(header + '\n')
    return f

def process_file(filename,result_directory='results/processed'):
  """
  Open filename and process it's contents. Uses group_lines() to group
  lines into different files based upon specific line acting as a
  content separator.
  """
  error_filename = filename_for_results('error',result_directory)
  with open(filename,'r') as in_file, open(error_filename,'w') as error_out:
    for batch_id, lines in group_lines(in_file):
      if len(lines) == 0:
        error_out.write("Received batch %r with 0 lines" % (batch_id,))
        continue
      out_filename = filename_for_results(batch_id,result_directory)
      with closing(open_result_file(out_filename)) as out_file:
        for line in lines:
          if line.startswith('L') and line.endswith('E') and line.count(',') == 5:
            line = line.lstrip('L').rstrip('E')
            out_file.write(line + '\n')
          else:
            error_out.write("Unknown line, batch=%r: %r\n" %(batch_id,line))

if __name__ == '__main__':
  files = list_files_to_process()
  for filename in files:
    print "Processing %s" % (filename,)
    process_file(filename)

python - 正しい文字列の代わりに null バイトをファイルに書き込む

1 に答える 1

Related

Reference