私はプログラミングの初心者で、Python を使用しています。現時点では、Jurafsky と Martin の 2008 年の Speech and Language Processing に関する本 (構文解析に関する演習 13.1) からいくつかのコードを理解しようとしています。以下にコピーします (最後の 4 行を除いて、このコードは自分で書いたものではありません)。
私の質問は非常に単純です。文法規則を出力する代わりに、次のような出力が得られます。
set([<__main__.Rule object at 0x011E1810>, <__main__.Rule object at 0x011E1790>, <__main__.Rule object at 0x011E15F0>, ...)
str (自己)を使用して何かを行う必要があることはわかっていますが、いくつか試してみましたが、まだ正常な出力が得られません。解決策は非常に簡単だと思いますが、何をすべきかわかりません。どんな助けでも大歓迎です。おそらく、何が機能していないかを確認するために、以下のすべてのコードを読んで理解する必要はありません。
どうもありがとう!
def chomsky_normal_form(grammar):
grammar = set(grammar)
nonterminals = set(rule.head for rule in grammar)
# remove single symbol nonterminal rules
for rule, symbol in _unary_rules(grammar, nonterminals):
grammar.discard(rule)
for rule2 in _rules_headed_by(grammar, symbol):
grammar.add(Rule(rule.head, tuple(rule2.symbols)))
if all(symbol not in rule.symbols for rule in grammar):
for rule2 in _rules_headed_by(grammar, symbol):
grammar.discard(rule2)
# move terminals to their own rules
for rule in list(grammar):
if len(rule.symbols) >= 2:
for i, symbol in enumerate(rule.symbols):
if all(rule.head != symbol for rule in grammar):
rule = _new_symbol(grammar, rule, i, i + 1)
# ensure there are only two nonterminals per rule
for rule in _multi_symbol_rules(grammar):
_new_symbol(grammar, rule, 0, 2)
# return the grammar in CNF
return grammar
# find A -> B rules, allowing concurrent modifications
def _unary_rules(grammar, nonterminals):
while True:
g = ((rule, rule.symbols[0])
for rule in grammar
if len(rule.symbols) == 1
if rule.symbols[0] in nonterminals)
yield g.next()
# find all rules headed by the given symbol
def _rules_headed_by(grammar, symbol):
return [rule for rule in grammar if rule.head == symbol]
# create a new symbol which derives the given span of symbols
def _new_symbol(grammar, rule, start, stop):
symbols = rule.symbols
new_head = '_'.join(symbols[start:stop]).upper()
new_symbols = symbols[:start] + (new_head,) + symbols[stop:]
new_rule = Rule(rule.head, new_symbols)
grammar.discard(rule)
grammar.add(new_rule)
grammar.add(Rule(new_head, symbols[start:stop]))
return new_rule
# find A -> BCD... rules, allowing concurrent modifications
def _multi_symbol_rules(grammar):
while True:
g = (rule for rule in grammar if len(rule.symbols) >= 3)
yield g.next()
# representation of a rule A -> B...C
class Rule(object):
def __init__(self, head, symbols):
self.head = head
self.symbols = symbols
self._key = head, symbols
def __eq__(self, other):
return self._key == other._key
def __hash__(self):
return hash(self._key)
def __str__(self):
rep = grammar_cnf
return rep
# build a grammar from a string of lines like "X -> YZ | b"
def get_grammar(string):
grammar = set()
for line in string.splitlines():
head, symbols_str = line.split(' -> ')
for symbols_str in symbols_str.split(' | '):
symbols = tuple(symbols_str.split())
grammar.add(Rule(head, symbols))
return grammar
grammar = get_grammar("""S -> NP VP | Aux NP VP | VP
NP -> Pronoun | Proper-Noun | Det Nominal
Nominal -> Noun | Nominal Noun | Nominal PP
VP -> Verb | Verb NP | Verb NP PP | Verb PP | VP PP
PP -> Preposition NP
Det -> that | this | a
Noun -> book | flight | meal | money
Verb -> book | include | prefer
Pronoun -> I | she | me
Proper-Noun -> Houston | TWA
Aux -> does
Preposition -> from | to | on | near | through""")
grammar_cnf = chomsky_normal_form(grammar)
print(grammar_cnf)