1 つのアプローチは、指定された決定論的オートマトンによって受け入れられる正しい文字数の単語からランダムに一様にサンプリングすることです。このアルゴリズムは、オートマトンの状態と残りのシンボル数に関する動的プログラムです。20 個の a と 20 個の b を使用したサンプル出力を次に示します。
abbaabbbaabbbabaaabbaaababbaaabbbabbbaaa
bbaaababababbaaabbababababbbabaaababaabb
bbababbbaaabaaabbbabaabaaabbbaababbababa
ababbbabbababbbaabababbaababaabbaaababaa
bbaaababbababbabaabbababaabababaabababba
bbaabbababbbaabbababaaabaababbbaababaaab
babaabaabbababbababbababbaababaaababbaba
aaabababaababbabbababbbaabbababaabbaaabb
babababbabaaababababababaababbbaabbaabba
bbabaabababababbabaababaababbbaabbabaaba
これらを生成した Python は次のとおりです。
from collections import namedtuple
from itertools import product, repeat
from random import random
"""
deterministic finite automata
delta is a dict from state-symbol pairs to states
q0 is the initial state
F is the set of accepting states
"""
DFA = namedtuple('DFA', ('delta', 'q0', 'F'))
"""accepts strings with no runs of length 4"""
noruns4 = DFA(
delta={
('0', 'a'): '1a',
('0', 'b'): '1b',
('1a', 'a'): '2a',
('1a', 'b'): '1b',
('1b', 'a'): '1a',
('1b', 'b'): '2b',
('2a', 'a'): '3a',
('2a', 'b'): '1b',
('2b', 'a'): '1a',
('2b', 'b'): '3b',
('3a', 'a'): '4',
('3a', 'b'): '1b',
('3b', 'a'): '1a',
('3b', 'b'): '4',
('4', 'a'): '4',
('4', 'b'): '4'},
q0='0',
F={'0', '1a', '1b', '2a', '2b', '3a', '3b'})
def accepts(dfa, s):
"""returns whether dfa accepts s"""
q = dfa.q0
for c in s:
q = dfa.delta[(q, c)]
return q in dfa.F
def testaccepts():
for n in range(10):
for cs in product(*repeat('ab', n)):
s = ''.join(cs)
if not accepts(noruns4, s) != ('aaaa' in s or 'bbbb' in s):
print(s)
assert False
testaccepts()
def acceptedstrcnts(dfa, syms, cnts, memo=None, q=None):
"""
counts the number of strings accepted by dfa,
subject to the constraint of having the specified number of symbols
"""
if memo is None:
memo = {}
if q is None:
q = dfa.q0
key = (q,) + tuple(cnts)
if key not in memo:
if sum(cnts) > 0:
total = 0
for (i, cnt) in enumerate(cnts):
if cnt > 0:
newcnts = list(cnts)
newcnts[i] -= 1
newq = dfa.delta[(q, syms[i])]
total += acceptedstrcnts(dfa, syms, newcnts, memo, newq)
else:
total = 1.0 if q in dfa.F else 0.0
memo[key] = total
return memo[key]
print(acceptedstrcnts(noruns4, 'ab', (125, 50)))
memo = {}
acceptedstrcnts(noruns4, 'ab', (4, 4), memo)
# 62 strings with 4 a's, 4 b's, and no runs
print(memo)
def memoget(memo, q, cnts):
return memo[(q,) + tuple(cnts)]
def samplestrcnts(dfa, syms, cnts, memo):
"""
uses the memoization dict to sample the counted words
modulo roundoff error, the sampling is uniform
"""
cnts = list(cnts)
cs = []
q = dfa.q0
while sum(cnts) > 0:
denom = memoget(memo, q, cnts)
outcome = random()
j = None
for (i, cnt) in enumerate(cnts):
if cnt > 0:
j = i # default in case roundoff bites us
newcnts = list(cnts)
newcnts[i] -= 1
newq = dfa.delta[(q, syms[i])]
numer = memoget(memo, newq, newcnts)
ratio = numer / denom
if outcome < ratio:
break
outcome -= ratio
cnts[j] -= 1
cs.append(syms[j])
q = dfa.delta[(q, syms[j])]
return ''.join(cs)
acceptedstrcnts(noruns4, 'ab', (20, 20), memo)
for k in range(10):
print(samplestrcnts(noruns4, 'ab', (20, 20), memo))