2

優れた本 Programming Collective Intelligence で提供されている単純ベイズ分類器のコードを変更して、GAE データストアに適合させようとしています (提供されたコードは pysqlite2 を使用しています)。しかし、それをやろうとして、私はこの行で遭遇しています:

update.count = count + 1

このブロックから:

def incf(self,f,cat):
    count=self.fcount(f,cat)
    if count==0:
      fc_value = fc(feature = f, category = cat, count = 1)
      fc_value.put()
    else:
        update = db.GqlQuery("SELECT count FROM fc where feature =:feature AND category =:category", feature = f, category = cat).get()
        update.count = count + 1
        update.put()
#      self.con.execute(
 #       "update fc set count=%d where feature='%s' and category='%s'"
  #      % (count+1,f,cat))

このエラー:

  File "C:\Users\CG\Desktop\Google Drive\Sci&Tech\projects\naivebayes\main.py", line 221, in post
    sampletrain(nb)
  File "C:\Users\CG\Desktop\Google Drive\Sci&Tech\projects\naivebayes\main.py", line 206, in sampletrain
    cl.train('Nobody owns the water.','good')
  File "C:\Users\CG\Desktop\Google Drive\Sci&Tech\projects\naivebayes\main.py", line 144, in train
    self.incf(f,cat)
  File "C:\Users\CG\Desktop\Google Drive\Sci&Tech\projects\naivebayes\main.py", line 82, in incf
    update.count = count + 1
TypeError: unsupported operand type(s) for +: 'IntegerProperty' and 'int'

そして、私は理解できません:なぜ私はcount1でインクリメントできないのですか?

update = db.GqlQuery("SELECT count FROM cc where category =:category", category = cat).get() update.count = count + 1整数ではなく「IntegerProperty」なのはなぜですか?

どうすれば修正できますか?update.count = count + 1この操作を行うための正しい構文は何ですか?

ここにコード全体があります:

import os

import random

import re

import math

from google.appengine.ext import db

import webapp2

import jinja2

from jinja2 import Environment, FileSystemLoader

jinja_environment = jinja2.Environment(autoescape=True,
    loader=jinja2.FileSystemLoader(os.path.join(os.path.dirname(__file__), 'templates')))

class fc(db.Model):
    feature = db.StringProperty()
    category = db.StringProperty()
    count = db.IntegerProperty()

class cc(db.Model):
    category = db.StringProperty()
    count = db.IntegerProperty()

def getfeatures(doc):
  splitter=re.compile('\\W*')
  # Split the words by non-alpha characters
  words=[s.lower() for s in splitter.split(doc)
          if len(s)>2 and len(s)<20]
  return dict([(w,1) for w in words])

class classifier:
  def __init__(self,getfeatures, filename=None):
    # Counts of feature/category combinations
    self.fc={}
    # Counts of documents in each category
    self.cc={}
    self.getfeatures=getfeatures

#  def setdb(self,dbfile):
#    self.con=sqlite.connect('db_file')
#    self.con=sqlite3.connect(":memory:")
#    self.con.execute('create table if not exists fc(feature,category,count)')
#    self.con.execute('create table if not exists cc(category,count)')

  def incf(self,f,cat):
    count=self.fcount(f,cat)
    if count==0:
      fc_value = fc(feature = f, category = cat, count = 1)
      fc_value.put()
    else:
        update = db.GqlQuery("SELECT count FROM fc where feature =:feature AND category =:category", feature = f, category = cat).get()
        update.count = count + 1
        update.put()
#      self.con.execute(
 #       "update fc set count=%d where feature='%s' and category='%s'"
  #      % (count+1,f,cat))

  def fcount(self,f,cat):
    res = db.GqlQuery("SELECT * FROM fc WHERE feature =:feature AND category =:category", feature = f, category = cat).get()
    logging.debug('This is a log message.')
#    res=self.con.execute(
#      'select count from fc where feature="%s" and category="%s"'
#      %(f,cat)).fetchone()
    if res is None: return 0
    else:
        res = fc.count
        return res
#        return float(res[0])

  def incc(self,cat):
    count=self.catcount(cat)
    if count==0:
      #  self.con.execute("insert into cc values ('%s',1)" % (cat))
      cc_value = cc(category = cat, count = 1)
      cc_value.put()
    else:
        update = db.GqlQuery("SELECT count FROM cc where category =:category", category = cat).get()
        update.count = count + 1
        update.put()
#      self.con.execute("update cc set count=%d where category='%s'"
#                       % (count+1,cat))

  def catcount(self,cat):
#    res=self.con.execute('select count from cc where category="%s"'
 #                        %(cat)).fetchone()
    res = db.GqlQuery("SELECT count FROM cc WHERE category =:category", category = cat).get()
    if res is None: return 0
#    else: return float(res[0])
    else: return float(res)

  def categories(self):
#    cur = self.con.execute('select category from cc');
    cur = db.GqlQuery("SELECT category FROM cc").fetch(999)
    return [d[0] for d in cur]

  def totalcount(self):
   # res=self.con.execute('select sum(count) from cc').fetchone();
    all_cc = db.GqlQuery("SELECT * FROM cc").fetch(999)
    res = 0
    for cc in all_cc:
        count = cc.count
        res+=count
#    res = db.GqlQuery("SELECT sum(count) FROM cc").get()
#    if res==None: return 0
    if res == 0: return 0
#    return res[0]
    return res

  def train(self,item,cat):
    features=self.getfeatures(item)
    # Increment the count for every feature with this category
    for f in features.keys():
##    for f in features:
      self.incf(f,cat)
    # Increment the count for this category
    self.incc(cat)
#    self.con.commit()

  def fprob(self,f,cat):
    if self.catcount(cat)==0: return 0
    # The total number of times this feature appeared in this
    # category divided by the total number of items in this category
    return self.fcount(f,cat)/self.catcount(cat)

  def weightedprob(self,f,cat,prf,weight=1.0,ap=0.5):
    # Calculate current probability
    basicprob=prf(f,cat)
    # Count the number of times this feature has appeared in
    # all categories
    totals=sum([self.fcount(f,c) for c in self.categories()])

    # Calculate the weighted average
    bp=((weight*ap)+(totals*basicprob))/(weight+totals)
    return bp

class naivebayes(classifier):
  def __init__(self,getfeatures):
    classifier.__init__(self, getfeatures)
    self.thresholds={}

  def docprob(self,item,cat):
    features=self.getfeatures(item)
    # Multiply the probabilities of all the features together
    p=1
    for f in features: p*=self.weightedprob(f,cat,self.fprob)
    return p

  def prob(self,item,cat):
    catprob=self.catcount(cat)/self.totalcount()
    docprob=self.docprob(item,cat)
    return docprob*catprob

  def setthreshold(self,cat,t):
    self.thresholds[cat]=t

  def getthreshold(self,cat):
    if cat not in self.thresholds: return 1.0
    return self.thresholds[cat]

  def classify(self,item,default=None):
    probs={}
    # Find the category with the highest probability
    max=0.0
    for cat in self.categories():
      probs[cat]=self.prob(item,cat)
      if probs[cat]>max:
        max=probs[cat]
        best=cat
    # Make sure the probability exceeds threshold*next best
    for cat in probs:
      if cat==best: continue
      if probs[cat]*self.getthreshold(best)>probs[best]: return default
    return best

def sampletrain(cl):
  cl.train('Nobody owns the water.','good')
  cl.train('the quick rabbit jumps fences','good')
  cl.train('buy pharmaceuticals now','bad')
  cl.train('make quick money at the online casino','bad')
  cl.train('the quick brown fox jumps','good')


class MainHandler(webapp2.RequestHandler):
    def get(self):
        template_values = {"given_sentence":'put a name here'}
        template = jinja_environment.get_template('index.html')
        self.response.out.write(template.render(template_values))

    def post(self):
        nb = naivebayes(getfeatures)
        sampletrain(nb)
        given_sentence = self.request.get("given_sentence")
        spam_result = nb.classify(given_sentence)
        submit_button = self.request.get("submit_button")
        if submit_button:
            self.redirect('/test_result?spam_result=%s&given_sentence=%s' % (spam_result, given_sentence))

class test_resultHandler(webapp2.RequestHandler):
    def get(self):
        spam_result = self.request.get("spam_result")
        given_sentence = self.request.get("given_sentence")
        test_result_values = {"spam_result": spam_result,
                             "given_sentence": given_sentence}
        template = jinja_environment.get_template('test_result.html')
        self.response.out.write(template.render(test_result_values))

app = webapp2.WSGIApplication([('/', MainHandler), ('/test_result', test_resultHandler)],
                              debug=True)
4

2 に答える 2

3

「プロパティ」は通常、クラスではなく、オブジェクト インスタンスでアクセスすることを意図しています。変更してみてください:

class fc(db.Model):
    feature = db.StringProperty()
    category = db.StringProperty()
    count = db.IntegerProperty()

次のようなものに:

class fc_class(db.Model):
    feature = db.StringProperty()
    category = db.StringProperty()
    count = db.IntegerProperty()
fc = fc_class()

(これは素晴らしい設計ではありませんが、あなたが抱えている問題を解決すると思います。つまり、プロパティオブジェクトは、クラス自体ではなく、クラスインスタンスを介してアクセスされたときにのみ特別な動作をするということです。)

于 2012-08-14T02:51:21.733 に答える
2

あなたのコードを見ると、問題はres割り当てにあるようです:

def fcount(self,f,cat):
  res = db.GqlQuery("SELECT * FROM fc WHERE feature =:feature AND category =:category", feature = f, category = cat).get()
  logging.debug('This is a log message.')
  if res is None: return 0
  else:
    res = fc.count
    return res

私が言えることから、あなたは to のバインドされていないメソッドを割り当てていますclass fcresこれは @Hamish が指摘しているように、実際にはIntegerProperty(). 代わりに (繰り返しますが、ここでは専門家ではありません)、おそらく次のような値を返したいと思うでしょう:

def fcount(self,f,cat):
  res = db.GqlQuery("SELECT * FROM fc WHERE feature =:feature AND category =:category", feature = f, category = cat).get()
  logging.debug('This is a log message.')
  if res is None: return 0
  else:
    return res.count

私はそれresがすでに Query インスタンスであると信じています (これはあなたが望むものです)。その後、count属性をプルして必要な値を取得できます。

于 2012-08-14T03:06:19.627 に答える