残念ながら、Excel は要素の値をインポートするが、それらの要素内の属性をインポートしないことを以前に発見しました。たとえば、"stuff211" は、stuff211 をインポートしますが、stuff111 はインポートしません。これは、Excel で機能がどのように機能するかの制限にすぎないと思います。インポートは Excel で行う必要がありますか、それとも Python などのプログラミング言語を使用できますか? 以前、xml ファイルから特定の要素と属性の値を抽出する手順を書いたことがありますが、必要に応じて明日掘り出して共有できれば幸いです。
アップデート
これは、xml ファイルから csv ファイルにデータを削除するために以前に作成して使用した python スクリプトです。最初の目的はファイルから特定のデータを取得することだけだったので、すべての属性と要素を取得するようにまだ設定していないことに注意してください。search_items グローバル リストを、検索するアイテムに編集する必要があります。
xml ファイルのパスに 1 つの引数を指定してコマンド ラインからスクリプトを呼び出すことも、引数なしで使用することもでき、ディレクトリを選択するように求められます。ご不明な点がございましたら、お気軽にお問い合わせください。
#!/usr/bin/python
# Change Ideas
# ------------
# Add option to get all xml elements / attributes
# Add support for json?
import sys, os, Tkinter, tkFileDialog as fd, traceback
# stop tinker shell from opening as only needed for file dialog
root = Tkinter.Tk()
root.withdraw()
# globals
debug_on = False
#get_all_elements = False
#get_all_attributes = False
# search items to be defined each time you run to identify values to search for.
# each item should have a search text, a type and optionally a heading e.g.
# search_items = ['exact_serach_text', 'item_type', 'column_heading(optional)']
# note: search items are case sensitive.
#
############################## E X A M P L E ##############################
search_items = [
['policyno=', 'xml_attribute', 'Policy No' ],
['transid=', 'xml_attribute', 'Trans ID' ],
['policyPremium=', 'xml_attribute', 'Pol Prem' ],
['outstandingBalance=', 'xml_attribute', 'Balance' ],
['APRCharge=', 'xml_attribute', 'APR Chrg' ],
['PayByAnnualDD=', 'xml_attribute', 'Annual DD' ],
['PayByDD=', 'xml_attribute', 'Pay by DD' ],
['mtaDebitAmount=', 'xml_attribute', 'MTA Amt' ],
['paymentMethod=', 'xml_attribute', 'Pmt Meth' ],
['ddFirstPaymentAmount=', 'xml_attribute', '1st Amt' ],
['ddRemainingPaymentsAmount=', 'xml_attribute', 'Other Amt' ],
['ddNumberOfPaymentsRemaining=', 'xml_attribute', 'Instl Rem' ],
]
item_types = ['xml_attribute', 'xml_element']
def get_heads():
heads = []
for i in search_items:
try:
# raise error if i[2] does not exist or is empty
assert len(i[2]) > 0, "No value in heading, use search text."
except:
heads.append(i[0]) # use search item as not heading is given
else:
heads.append(i[2])
return heads
def write_csv_file(path, heads, data):
"""
Writes data to file, use None for heads param if no headers required.
"""
with open(path, 'wb') as fileout:
writer = csv.writer(fileout)
if heads:
writer.writerow(heads)
for row in data:
try:
writer.writerow(row)
except:
print '...row failed in write to file:', row
exc_type, exc_value, exc_traceback = sys.exc_info()
lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
for line in lines:
print '!!', line
print 'Data written to:', path, '\n'
def find_val_in_line(item, item_type, line):
if item_type.lower() == 'xml_element':
print 'Testing still in progress for xml elements, please check output carefully'
b1, b2 = ">", "<"
tmp = line.find(item) # find the starting point of the element value
x = line.find(b1, tmp+1) + len(boundary) # find next boundary after item
y = line.find(b2, x) # find subsequent boundary to mark end of element value
return line[x:y]
elif item_type.lower() == 'xml_attribute':
b = '"'
tmp = line.find(item) # find the starting point of the attribute
x = line.find(b, tmp+1) + len(b) # find next boundary after item
y = line.find(b, x) # find subsequent boundary to mark end of attribute
return line[x:y] # return value between start and end boundaries
else:
print 'This program does not currently support type:', item_type
print 'Returning null'
return None
def find_vals_in_file(file_path):
with open(file_path, "r") as f:
buf = f.readlines()
f.seek(0)
data, row = [], []
found_in_row, pos = 0, 0
l = len(search_items)
if debug_on: print '\nsearch_items set to:\n' + str(search_items) + '\n'
# loop through the lines in the file...
for line in buf:
if debug_on: print '\n..line set to:\n ' + line
# loop through search items on each line...
for i in search_items:
if debug_on: print '...item set to:\n ' + i[0]
# if the search item is found in the line...
if i[0] in line:
val = find_val_in_line(i[0], i[1], line)
# only count as another item found if not already in that row
try:
# do not increment cnt if this works as item already exists
row[pos] = val
if debug_on: print '.....repeat item found:- ' + i[0] + ':' + val
except IndexError:
found_in_row += 1 # Index does not exist, count as new
row.append(val)
if debug_on: print '....item found, row set to:\n ' + str(row)
# if we have a full row then add row to data and start row again...
if found_in_row == l:
if debug_on: print '......row complete, appending to data\n'
data.append(row)
row, found_in_row = [], 0
pos += 1 # start at 0 and increment 1 at end of each search item
pos = 0
f.close()
return data
def main():
path, matches = None, []
os.chdir(os.getenv('userprofile'))
# check cmd line args provided...
if len(sys.argv) > 1:
path = sys.argv[1]
else:
while not path:
try:
print 'Please select a file to be parsed...'
path = fd.askopenfilename()
except:
print 'Error selecting file, please try again.'
# search for string in each file...
try:
matches = find_vals_in_file(path)
except:
exc_type, exc_value, exc_traceback = sys.exc_info()
lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
print "An error occurred checking file:", path
print ''.join('!! ' + line for line in lines)
# write output to file...
if len(matches) == 0:
print "No matches were found in the files reviewed."
else:
heads = get_heads()
output_path = os.path.join(os.getcwd(),'tmp_matches.csv')
write_csv_file(output_path, heads, matches)
print "Please rename the file if you wish to keep it as it will be overwritten..."
print "\nOpening file..."
os.startfile(output_path)
if debug_on:
print "\nWriting output to screen\n", "-"*24
print heads
for row in matches:
print row
if __name__ == '__main__':
main()
これがうまくいくことを願っています。これまでのところ、いくつかの異なる xml ファイルでテストしただけですが、問題なく動作しました。