パンダを使用して PATID 値を UUID 値に置き換える
-- a.csv
PATID, ADMIT_DATE, ENC_TYPE
pat_1,2011-01-01, AV
pat_2,2012-01-01, IP
pat_3,2013-01-01, EI
-- b.csv
PATID,UUID
pat_1, uuid_xyz
pat_2, uuid_123
pat_3, uuid_abc
--replace.py
import pandas as pd
source_file = 'a.csv'
linkage_file = 'b.csv'
out_file = 'c.csv'
def get_linked_frame(source_file, linkage_file):
df_source = pd.read_csv(source_file, sep=',',
dtype=object,
skipinitialspace=True,
)
df_linkage = pd.read_csv(linkage_file, sep=',',
dtype=object,
skipinitialspace=True,
)
df = pd.merge(df_source, df_linkage, on='PATID')
print("Source frame: \n{}".format(df_source))
df['PATID'] = df['UUID']
df.drop('UUID', axis=1, inplace=True)
return df
df = get_linked_frame(source_file, linkage_file)
df.to_csv(out_file, sep='\t', index=False)
print("Output frame: \n{}".format(df))
-- c.csv
PATID ADMIT_DATE ENC_TYPE
uuid_xyz 2011-01-01 AV
uuid_123 2012-01-01 IP
uuid_abc 2013-01-01 EI