限られた語彙コマンドの解析に pyparsing を使用しましたが、投稿された例に対処する pyparsing の上に小さなフレームワークがあります。
from pyparsing import *
transVerb, transVerbPlural, transVerbPast, transVerbProg = (Forward() for i in range(4))
intransVerb, intransVerbPlural, intransVerbPast, intransVerbProg = (Forward() for i in range(4))
singNoun,pluralNoun,properNoun = (Forward() for i in range(3))
singArticle,pluralArticle = (Forward() for i in range(2))
verbProg = transVerbProg | intransVerbProg
verbPlural = transVerbPlural | intransVerbPlural
for expr in (transVerb, transVerbPlural, transVerbPast, transVerbProg,
intransVerb, intransVerbPlural, intransVerbPast, intransVerbProg,
singNoun, pluralNoun, properNoun, singArticle, pluralArticle):
expr << MatchFirst([])
def appendExpr(e1, s):
c1 = s[0]
e2 = Regex(r"[%s%s]%s\b" % (c1.upper(), c1.lower(), s[1:]))
e1.expr.exprs.append(e2)
def makeVerb(s, transitive):
v_pl, v_sg, v_past, v_prog = s.split()
if transitive:
appendExpr(transVerb, v_sg)
appendExpr(transVerbPlural, v_pl)
appendExpr(transVerbPast, v_past)
appendExpr(transVerbProg, v_prog)
else:
appendExpr(intransVerb, v_sg)
appendExpr(intransVerbPlural, v_pl)
appendExpr(intransVerbPast, v_past)
appendExpr(intransVerbProg, v_prog)
def makeNoun(s, proper=False):
if proper:
appendExpr(properNoun, s)
else:
n_sg,n_pl = (s.split() + [s+"s"])[:2]
appendExpr(singNoun, n_sg)
appendExpr(pluralNoun, n_pl)
def makeArticle(s, plural=False):
for ss in s.split():
if not plural:
appendExpr(singArticle, ss)
else:
appendExpr(pluralArticle, ss)
makeVerb("disappear disappears disappeared disappearing", transitive=False)
makeVerb("walk walks walked walking", transitive=False)
makeVerb("see sees saw seeing", transitive=True)
makeVerb("like likes liked liking", transitive=True)
makeNoun("dog")
makeNoun("girl")
makeNoun("car")
makeNoun("child children")
makeNoun("Kim", proper=True)
makeNoun("Jody", proper=True)
makeArticle("a the")
makeArticle("this every")
makeArticle("the these all some several", plural=True)
transObject = (singArticle + singNoun | properNoun | Optional(pluralArticle) + pluralNoun | verbProg | "to" + verbPlural)
sgSentence = (singArticle + singNoun | properNoun) + (intransVerb | intransVerbPast | (transVerb | transVerbPast) + transObject)
plSentence = (Optional(pluralArticle) + pluralNoun) + (intransVerbPlural | intransVerbPast | (transVerbPlural |transVerbPast) + transObject)
sentence = sgSentence | plSentence
def test(s):
print s
try:
print sentence.parseString(s).asList()
except ParseException, pe:
print pe
test("Kim likes cars")
test("The girl saw the dog")
test("The dog saw Jody")
test("Kim likes walking")
test("Every girl likes dogs")
test("All dogs like children")
test("Jody likes to walk")
test("Dogs like walking")
test("All dogs like walking")
test("Every child likes Jody")
版画:
Kim likes cars
['Kim', 'likes', 'cars']
The girl saw the dog
['The', 'girl', 'saw', 'the', 'dog']
The dog saw Jody
['The', 'dog', 'saw', 'Jody']
Kim likes walking
['Kim', 'likes', 'walking']
Every girl likes dogs
['Every', 'girl', 'likes', 'dogs']
All dogs like children
['All', 'dogs', 'like', 'children']
Jody likes to walk
['Jody', 'likes', 'to', 'walk']
Dogs like walking
['Dogs', 'like', 'walking']
All dogs like walking
['All', 'dogs', 'like', 'walking']
Every child likes Jody
['Every', 'child', 'likes', 'Jody']
語彙を増やすと、これは遅くなる可能性があります。50万のエントリー?妥当な機能語彙は 5 ~ 6,000 語程度であると考えました。そして、処理できる文の構造はかなり制限されます。自然言語は NLTK の目的です。