テキストからリストを生成する関数を作成する必要があります。
text = '^to[by, from] all ^appearances[appearance]'
list = ['to all appearances', 'to all appearance', 'by all appearances',
'by all appearance', 'from all appearances', 'from all appearance']
つまり、角かっこ内の値は、^の直後にある前の単語を置き換えることになっています。以下に示すように、関数に5つの引数が必要です...
私のコード(動作しません)
def addSubstitution(buf, substitutions, val1='[', val2=']', dsym=',', start_p="^"):
for i in range(1, len(buf), 2):
buff = []
buff.extend(buf)
if re.search('''[^{2}]+[{0}][^{1}{0}]+?[{1}]'''.format(val1, val2, start_p, buff[i]):
substrs = re.split('['+val1+']'+'|'+'['+val2+']'+'|'+dsym, buff[i])
for substr in substrs:
if substr:
buff[i] = substr
addSubstitution(buff, substitutions, val1, val2, dsym, start_p)
return
substitutions.add(''.join(buf))
pass
def getSubstitution(text, val1='[', val2=']', dsym=',', start_p="^"):
pattern = '''[^{2}]+[{0}][^{1}{0}]+?[{1}]'''.format(val1, val2, start_p)
texts = re.split(pattern,text)
opttexts = re.findall(pattern,text)
buff = []
p = iter(texts)
t = iter(opttexts)
buf = []
while True:
try:
buf.append(next(p))
buf.append(next(t))
except StopIteration:
break
substitutions = set()
addSubstitution(buf, substitutions, val1, val2, dsym, start_p)
substitutions = list(substitutions)
substitutions.sort(key=len)
return substitutions