0

これは私がログから得ているエラーです:

[ERROR] KeyError: 'Text'
Traceback (most recent call last):
  File "/var/task/lambda_function.py", line 51, in lambda_handler
    pdfText += item["Text"] + '\n'

フォームからデータを抽出し、.csv ファイルとして S3 に保存するために、テキストを介してフォーム分析を実行しようとしています。私のコードは以下の通りです:

import boto3
import os

def getJobResults(jobId):

    pages = []

    textract = boto3.client('textract')
    response = textract.get_document_analysis(JobId=jobId)
    
    pages.append(response)

    nextToken = None
    if('NextToken' in response):
        nextToken = response['NextToken']

    while(nextToken):

        response = textract.get_document_analysis(JobId=jobId, NextToken=nextToken)

        pages.append(response)
        nextToken = None
        if('NextToken' in response):
            nextToken = response['NextToken']

    return pages

def lambda_handler(event, context):
    notificationMessage = json.loads(json.dumps(event))['Records'][0]['Sns']['Message']
    
    pdfTextExtractionStatus = json.loads(notificationMessage)['Status']
    pdfTextExtractionJobTag = json.loads(notificationMessage)['JobTag']
    pdfTextExtractionJobId = json.loads(notificationMessage)['JobId']
    pdfTextExtractionDocumentLocation = json.loads(notificationMessage)['DocumentLocation']
    
    pdfTextExtractionS3ObjectName = json.loads(json.dumps(pdfTextExtractionDocumentLocation))['S3ObjectName']
    pdfTextExtractionS3Bucket = json.loads(json.dumps(pdfTextExtractionDocumentLocation))['S3Bucket']
    
    print(pdfTextExtractionJobTag + ' : ' + pdfTextExtractionStatus)
    
    pdfText = ''
    
    if(pdfTextExtractionStatus == 'SUCCEEDED'):
        response = getJobResults(pdfTextExtractionJobId)
        
        for resultPage in response:
            for item in resultPage["Blocks"]:
                if item["BlockType"] == "KEY_VALUE_SET" :
                    EntityTypes : ['KEY'|'VALUE']
                    pdfText += item["Text"] + '\n'
                    
                    
        s3 = boto3.client('s3')
        
        outputTextFileName = os.path.splitext(pdfTextExtractionS3ObjectName)[0] + '.csv'
        s3.put_object(Body=pdfText, Bucket=pdfTextExtractionS3Bucket, Key=outputTextFileName)

私がフォローしているドキュメントは次のとおりです

どんなアドバイスでも大歓迎です!ありがとうございました

4

0 に答える 0