PDFファイルをテキストに変換するc#機能があります
PDFファイルが5ページ以下の場合は正常に動作します
しかし、PDFファイルに6ページ以上ある場合、このエラーが発生します
System.ArgumentOutOfRangeException: Index was out of range. Must be non-negative and less than the size of the collection.
Parameter name: index
at System.ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument argument, ExceptionResource resource)
at System.Collections.Generic.List`1.get_Item(Int32 index)
at Newtonsoft.Json.Linq.JContainer.GetItem(Int32 index)
at CallSite.Target(Closure , CallSite , Object , Int32 )
これが私のコードです
private string OCRPDF(string pdfFile)
{
client = ImageAnnotatorClient.Create();
//var client = ImageAnnotatorClient.Create();
Byte[] bytes = File.ReadAllBytes(pdfFile);
var content_byte = ByteString.CopyFrom(bytes);
var syncRequest = new AnnotateFileRequest
{
InputConfig = new InputConfig
{
Content = content_byte,
// Supported mime_types are: 'application/pdf' and 'image/tiff'
MimeType = "application/pdf"
}
};
syncRequest.Features.Add(new Feature
{
Type = Feature.Types.Type.DocumentTextDetection
});
List<AnnotateFileRequest> requests =
new List<AnnotateFileRequest>();
requests.Add(syncRequest);
string response = client.BatchAnnotateFiles(requests).ToString();
dynamic pdfJSON = JObject.Parse(response);
int totalPages = Convert.ToInt32(pdfJSON.responses[0].totalPages);
if (totalPages > 5)
{
totalPages = 5;
}
string pdfText = "";
for (int i = 0; i < totalPages; i++)
{
pdfText = pdfText + " " + pdfJSON.responses[0].responses[i].fullTextAnnotation.text;
}
//pdfText.Replace("\n", "\r\n");
return pdfText;
}