2

キーワードで区切られたヘッダーに基づいて、テキストの壁をテキストのチャンクとテキストのリストに分割しようとしています。これを行う最善の方法は再帰であると考えました。残念ながら、特定の変数の型をチェックしようとすると、次のエラーが発生します*** TypeError: 'str' object is not callable。直接呼び出しているときに、PDB で同じエラーが発生しますtype(var)。これは意味がないようで、見えないものではないかと心配しています。

これが、コードの関連セクションであると私が信じているものです。もっと見る必要があると感じたら教えてください

def separate(text,boundary = None):
    pdb.set_trace()
    if boundary == None:
        m = re.findall(r'(?<=boundary=).*',text)
        i = 0
        textList = [text]
        while i < len(m): #have all levels of Boundary/headers named
            boundary = m[i]
            textList = recursiveSplit(textList,boundary)
            i += 1

    return textList

def recursiveSplit(chunk,boundary):
    if type(chunk) is types.ListType: #error occurs here
        for object in chunk:
            recursiveSplit(object,boundary)
    if type(chunk) is types.StringType:
        list = re.split(r'(?P<boundary>)(?!--)',chunk)
        return list
    return None

完全なコード。テキストファイルが必要です。任意の MIME メールを使用できます。テスト用に使用しているメールもアップロードします

    #Textbasics email parser
#based on a "show original" file converted into text

from sys import argv
import re, os, pdb, types

script, filename = argv
text = open(filename).read()
type = "text only" #Set the default type of email

#cut the email up by sections
#--A section is defined as any time there are two line breaks in a row
textList = re.split(r"\n\n", text)
header = textList[0]
if re.search(r'MIME-Version',header):
    type = "MIME"

# If mail has no attachments, parse as a text-only email
class Parser(object):

    def __init__(self,textList):
        a = 1
        self.body = ""
        self.textList = textList
        self.header = textList[0]
        while a < len(textList):
            self.body = self.body + textList[a] + '\n\n'
            a += 1

        m = re.search(r'(?<=Subject: ).*', self.header)
        self.subject = m.group(0)

        m = re.search(r'(?<=From: ).*', self.header)
        self.fromVar = m.group(0)

        m = re.search(r'(?<=To: ).*', self.header)
        self.toVar = m.group(0)

        m = re.search(r'(?<=Date: )\w+\s\w+\s\w+', self.header)
        self.date = m.group(0)

    def returnParsed(self,descriptor = "all"):
        if descriptor == "all":
            retv = "Subject: " + self.subject + "\n" + "From: " + self.fromVar + "\n" + "To: " + self.toVar + "\n" + "Date: " + self.date + "\n" + "\n" + self.body
            return retv

        if descriptor == "subject":
            return self.subject
        if descriptor == "fromVar":
            return self.fromVar
        if descriptor == "toVar":
            return self.toVar
        if descriptor == "date":
            return self.date
        if descriptor == "body":
            return self.body

class MIMEParser(Parser):

    class MIMEDataDecoder(object):
        def __init__(self,decodeString,type):
            pass    


    def __init__(self,textList):
        self.textList = textList
        self.nestedItems = []
        newItem = NestedItem(self)
        newItem.setContentType("Header")
        newItem.setValue(self.textList[0])
        self.nestedItems.append(newItem)
        if re.search(r'(boundary=)',newItem.value):
            helperItem = NestedItem(self)
            helperItem.value = (self.textList[0])
            m = re.search(r'(?<=Content-Type: ).+(?=;)',newItem.value)
            helperItem.setContentType(m.group(0))
            self.nestedItems.append(helperItem)

        self.organizeData()   
        """i = 0
        while i < len(self.textList):
            newItem = NestedItem(self)
            ct = self.nextContentType
            newItem.setContentType(ct)
            newItem.setValue(self.textList[i])
            self.nestedItems.append(newItem)
            m = re.search(r'(?<=Content-Type: ).+(?=;)',self.textList[i])
            if m:
                self.nextContentType = m.group(0)
            i += 1
            """

    def nestItem (self,item):
        self.nestedItems.append(item)

    def organizeData(self):
        self.nestLevel = 1
        self.currentSuper = self
        m = re.search(r'(?<=boundary=).*',self.textList[0])
        self.currentBoundary = m.group(0)
        self.currentList = self.textList
        self.currentList.remove(self.textList[0])
        self.formerObjectDatabase = {}
        pdb.set_trace()
        while self.nestLevel > 0:
            i = 0
            while i < len(self.currentList):

                boundary = self.currentBoundary
                #If block is a "normal block", containing a current boundary identifier
                p = re.search(r'--(?P<boundary>)(?!--)', text)
                if p:
                    newItem = NestedItem(self.currentSuper)
                    newItem.setValue(self.currentList[i])
                    r = re.search(r'(?<=Content-Type: ).+(?=;)',newItem.value)
                    if r:
                        newItem.setContentType(r.group(0))
                    self.currentObject = newItem
                    self.currentSuper.nestItem(self.currentObject)
                #If the block contains a new block boundary
                m = re.search(r'(?<=boundary=).*',self.currentList[i])
                if m:
                    #begin new layer of recursive commands
                    newFormerObject = self.FormerCurrentObject(self.currentList,self.currentSuper,self.currentBoundary)
                    self.formerObjectDatabase[self.nestLevel] = newFormerObject
                    self.currentSuper = self.currentObject
                    self.nestLevel += 1
                    self.currentBoundary = m.group(0)
                    boundary = self.currentBoundary
                    #self.currentList = re.split(r'--(?P<boundary>)(?!--)', self.currentList[i])
                boundary = self.currentBoundary
                #If block contains an "end of boundary" marker
                q = re.search(r'(?P<boundary>)--', text)
                if q:
                    self.nestLevel -= 1
                    currentObject = self.formerObjectDatabase[self.nestLevel]
                    self.currentList = currentObject.formerList
                    self.currentSuper = currentObject.formerSuper
                    self.currentBoundary = currentObject.formerBoundary
                i += 1                    


    class FormerCurrentObject:
        def __init__(self,formerList,formerSuper,formerBoundary):
            self.formerList = formerList
            self.formerSuper = formerSuper
            self.formerBoundary = formerBoundary




    def printAll(self):
        print "printing all: %d" % len(self.nestedItems)
        i = 0
        while i < len(self.nestedItems):
            print "printing out item %d" % i
            self.nestedItems[i].printOut()
            i += 1

class NestedItem(object):
    def __init__(self,superObject,contentType=" ",value = " "):
        self.superObject = superObject
        self.contentType = contentType
        self.value = value
        self.nestedItems = []

    def nestItem(self,item):
        self.nestedItems.append(item)

    def printOut(self,printBuffer = ""):
        print printBuffer + '++%s' % self.contentType
        print printBuffer + self.value
        a = 0
        printBuffer = printBuffer + "  "
        while a < len(self.nestedItems):
            self.nestedItems[a].printOut(printBuffer)

    def setContentType(self,contentType):
        self.contentType = contentType

    def setValue(self,value):
        self.value = value



if type == "text only":
    p = Parser(textList)
    print p.returnParsed()

def separate(text,boundary = None):
    pdb.set_trace()
    if boundary == None:
        m = re.findall(r'(?<=boundary=).*',text)
        i = 0
        textList = [text]
        while i < len(m): #have all levels of Boundary/headers named
            boundary = m[i]
            textList = recursiveSplit(textList,boundary)
            i += 1

    return textList

def recursiveSplit(chunk,boundary):
    if type(chunk) is types.ListType: #<<--error occurs here
        for obj in chunk:
            recursiveSplit(obj,boundary)
    if type(chunk) is types.StringType:
        list = re.split(r'(?P<boundary>)(?!--)',chunk)
        return list
    return None


if type == "MIME":
    #separate the text file instead by its boundary identifier
    p = MIMEParser(separate(text))
    p.printAll()
4

1 に答える 1

13

タイプする文字列を割り当てています:

type = "text only"

そしてそれを呼び出します

if type(chunk)...

例外を発生させる:

*** TypeError: 'str' object is not callable
于 2013-05-10T17:11:03.977 に答える