これはAWS Transcribe S3 .wav file to textへの後続の質問です。ストリームを使用して、.wav ファイルの内容を読み取り、AWS に送信します。
正しい書き起こしを返す代わりに、「うん」のようなナンセンスな言葉が返ってきます。ステートメント。AWS はバイト ストリームを正しく解釈できないようですが、何が問題なのかわかりません。ファイルを何らかの方法でエンコードする必要があるかどうか、つまり、ファイルから生の .wav バイトを直接送信できないかどうか疑問に思っています。それとも、これが .wav 形式であることをサービスに伝える必要があるのでしょうか?
ここで何が問題なのですか?入力ファイルは有効な .wav 音声ファイルで、聞くとはっきりと聞こえます。
ここに私のJavaコードがあります:
package com.amazonaws.transcribe;
import org.reactivestreams.Publisher;
import org.reactivestreams.Subscriber;
import org.reactivestreams.Subscription;
import software.amazon.awssdk.core.SdkBytes;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.transcribestreaming.TranscribeStreamingAsyncClient;
import software.amazon.awssdk.services.transcribestreaming.model.*;
import javax.sound.sampled.*;
import java.io.*;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicLong;
public class TranscribeFileFromStream {
private static final Region REGION = Region.US_EAST_1;
private static TranscribeStreamingAsyncClient client;
public static void main(String args[]) throws URISyntaxException, ExecutionException, InterruptedException, LineUnavailableException {
System.out.println(System.getProperty("java.version"));
client = TranscribeStreamingAsyncClient.builder()
.region(REGION)
.build();
try {
CompletableFuture<Void> result = client.startStreamTranscription(getRequest(16000),
new AudioStreamPublisher(getStreamFromFile()),
getResponseHandler());
result.get();
} finally {
if (client != null) {
client.close();
}
}
}
private static InputStream getStreamFromFile() {
try {
File inputFile = new File("~/work/transcribe/src/main/resources/story/media/Story3.m4a.wav");
InputStream audioStream = new FileInputStream(inputFile);
return audioStream;
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
}
private static StartStreamTranscriptionRequest getRequest(Integer mediaSampleRateHertz) {
return StartStreamTranscriptionRequest.builder()
.languageCode(LanguageCode.EN_US)
.mediaEncoding(MediaEncoding.PCM)
.mediaSampleRateHertz(mediaSampleRateHertz)
.build();
}
private static StartStreamTranscriptionResponseHandler getResponseHandler() {
return StartStreamTranscriptionResponseHandler.builder()
.onResponse(r -> {
System.out.println("Received Initial response");
})
.onError(e -> {
System.out.println(e.getMessage());
StringWriter sw = new StringWriter();
e.printStackTrace(new PrintWriter(sw));
System.out.println("Error Occurred: " + sw.toString());
})
.onComplete(() -> {
System.out.println("=== All records stream successfully ===");
})
.subscriber(event -> {
List<Result> results = ((TranscriptEvent) event).transcript().results();
if (results.size() > 0) {
if (!results.get(0).alternatives().get(0).transcript().isEmpty()) {
System.out.println(results.get(0).alternatives().get(0).transcript());
} else {
System.out.println("Empty result");
}
} else {
System.out.println("No results");
}
})
.build();
}
private static class AudioStreamPublisher implements Publisher<AudioStream> {
private final InputStream inputStream;
private static Subscription currentSubscription;
private AudioStreamPublisher(InputStream inputStream) {
this.inputStream = inputStream;
}
@Override
public void subscribe(Subscriber<? super AudioStream> s) {
if (this.currentSubscription == null) {
this.currentSubscription = new SubscriptionImpl(s, inputStream);
} else {
this.currentSubscription.cancel();
this.currentSubscription = new SubscriptionImpl(s, inputStream);
}
s.onSubscribe(currentSubscription);
}
}
public static class SubscriptionImpl implements Subscription {
private static final int CHUNK_SIZE_IN_BYTES = 1024 * 1;
private final Subscriber<? super AudioStream> subscriber;
private final InputStream inputStream;
private ExecutorService executor = Executors.newFixedThreadPool(1);
private AtomicLong demand = new AtomicLong(0);
SubscriptionImpl(Subscriber<? super AudioStream> s, InputStream inputStream) {
this.subscriber = s;
this.inputStream = inputStream;
}
@Override
public void request(long n) {
if (n <= 0) {
subscriber.onError(new IllegalArgumentException("Demand must be positive"));
}
demand.getAndAdd(n);
executor.submit(() -> {
try {
do {
ByteBuffer audioBuffer = getNextEvent();
if (audioBuffer.remaining() > 0) {
AudioEvent audioEvent = audioEventFromBuffer(audioBuffer);
subscriber.onNext(audioEvent);
} else {
subscriber.onComplete();
break;
}
} while (demand.decrementAndGet() > 0);
} catch (Exception e) {
subscriber.onError(e);
}
});
}
@Override
public void cancel() {
executor.shutdown();
}
private ByteBuffer getNextEvent() {
ByteBuffer audioBuffer = null;
byte[] audioBytes = new byte[CHUNK_SIZE_IN_BYTES];
int len = 0;
try {
len = inputStream.read(audioBytes);
if (len <= 0) {
audioBuffer = ByteBuffer.allocate(0);
} else {
audioBuffer = ByteBuffer.wrap(audioBytes, 0, len);
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return audioBuffer;
}
private AudioEvent audioEventFromBuffer(ByteBuffer bb) {
return AudioEvent.builder()
.audioChunk(SdkBytes.fromByteBuffer(bb))
.build();
}
}
}
これが私のプログラム出力です:
Received Initial response
No results
No results
Yeah.
No results
Yeah.
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
Yeah.
No results
No results
Oh,
No results
Oh,
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
Oh,
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results
No results