ストップワードを使用してPythonで自動言語検出を行っています
しかし、コードをテストしようとすると KeyError が発生します。これがコードです
import nltk
from nltk.corpus import stopwords
def scoreFunction(wholetext):
dictiolist={}
scorelist={}
NLTKlanguage = ["dutch","finnish","german","italian","portuguese","spanish","turkish","danish","english"," french","hungarian","norwegian","russian","swedish"]
FREElanguages = [""]
languages= NLTKlanguages + FREElanguages
for lang in NLTKlanguages:
dictiolist[lang]=stopwords.words(lang)
tokens=nltk.tokenize.word_tokenize(wholetext)
tokens=[t.lower() for t in tokens]
freq_dist=nltk.FreqDist(tokens)
for lang in languages:
scorelist[lang]=0
for word in freq_dist.keys()[0:20]:
if word in dictiolist[lang]:
scorelist[lang]+=1
return scorelist
def whichLanguage(scorelist):
maximum=0
for item in scorelist:
value = scorelist[item]
if maximum<value:
maximum = value
lang = item
return lang
実行すると、scoreFunction("hillo my name is osfar and i'm genius") というエラーが表示されます Traceback (most recent call last): File "", line 1, in
scoreFunction("hello my name is osfar and i'm very genius")
File "C:/Users/osama1/Desktop
/fun-test", line 17, in scoreFunction
if word in dictiolist[lang]:
KeyError: ''