これは醜い解決策です:
def parse(s,L=None):
do_return=L is None
if(not s):
return
if(do_return):
L=[]
substr=s[0]
for i in range(1,len(s)-1):
if s[:i] in s[i:]:
substr=s[:i]
else:
L.append(substr)
parse(s.replace(substr,''),L=L)
break
else:
L.append(s)
if(do_return):
LL=[(ss,s.count(ss)) for ss in L] #Count the number of times each substring appears
LLL=[]
#Now some of our (unmatched) substrings will be adjacent to each other.
#We should merge all adjacent unmatched strings together.
while LL:
LLL.append(LL.pop(0))
while LLL[-1][1] == 1 and LL: #check if next is unmatched
if(LL[0][1]==1): #unmatched, merge and remove
LLL[-1]=(LLL[-1][0]+LL[0][0],1)
LL.pop(0)
else: #matched, keep on going.
break
d={}
for k,v in LLL:
d[k]=v
return d
S='eg,abcgdfabc'
print parse(S) #{ 'e':1, 'g':2, ',':1, 'abc': 2, 'df', 1}
もちろん、g は 2 回一致するため (貪欲であるため)、これは期待どおりには機能しません ...
常に 3 つのグループで繰り返し処理したい場合、これは非常に簡単 (かつきれい) になります。
from collections import defaultdict
def parse(s,stride=3):
d=defaultdict(lambda:0)
while s:
key=s[:stride]
d[key]+=1
s=s[stride:]
#if you need a regular dictionary: dd={}; dd.update(d); return dd
return d