AWS SDK for Java version 2
Developer Guide

Working with Amazon Transcribe

The following example shows how bidirectional streaming works using Amazon Transcribe. Bidirectional streaming implies that there's both a stream of data going to the service and being received back in real time. The example uses Amazon Transcribe streaming transcription to send an audio stream and receive a stream of transcribed text back in real time.

See Streaming Transcription in the Amazon Transcribe Developer Guide to learn more about this feature.

See Getting Started in the Amazon Transcribe Developer Guide to get started using Amazon Transcribe.

Set up the Microphone

This code uses the javax.sound.sampled package to stream audio from an input device.

Code

import javax.sound.sampled.AudioFormat; import javax.sound.sampled.AudioSystem; import javax.sound.sampled.DataLine; import javax.sound.sampled.TargetDataLine; public class Microphone { public static TargetDataLine get() throws Exception { AudioFormat format = new AudioFormat(16000, 16, 1, true, false); DataLine.Info datalineInfo = new DataLine.Info(TargetDataLine.class, format); TargetDataLine dataLine = (TargetDataLine) AudioSystem.getLine(datalineInfo); dataLine.open(format); return dataLine; } }

See the complete example on GitHub.

Create a Publisher

This code implements a publisher that publishes audio data from the Amazon Transcribe audio stream.

Code

import java.io.IOException; import java.io.InputStream; import java.io.UncheckedIOException; import java.nio.ByteBuffer; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicLong; import org.reactivestreams.Publisher; import org.reactivestreams.Subscriber; import org.reactivestreams.Subscription; import software.amazon.awssdk.core.SdkBytes; import software.amazon.awssdk.services.transcribestreaming.model.AudioEvent; import software.amazon.awssdk.services.transcribestreaming.model.AudioStream; public class AudioStreamPublisher implements Publisher<AudioStream> { private final InputStream inputStream; public AudioStreamPublisher(InputStream inputStream) { this.inputStream = inputStream; } @Override public void subscribe(Subscriber<? super AudioStream> s) { s.onSubscribe(new SubscriptionImpl(s, inputStream)); } private class SubscriptionImpl implements Subscription { private static final int CHUNK_SIZE_IN_BYTES = 1024 * 1; private ExecutorService executor = Executors.newFixedThreadPool(1); private AtomicLong demand = new AtomicLong(0); private final Subscriber<? super AudioStream> subscriber; private final InputStream inputStream; private SubscriptionImpl(Subscriber<? super AudioStream> s, InputStream inputStream) { this.subscriber = s; this.inputStream = inputStream; } @Override public void request(long n) { if (n <= 0) { subscriber.onError(new IllegalArgumentException("Demand must be positive")); } demand.getAndAdd(n); executor.submit(() -> { try { do { ByteBuffer audioBuffer = getNextEvent(); if (audioBuffer.remaining() > 0) { AudioEvent audioEvent = audioEventFromBuffer(audioBuffer); subscriber.onNext(audioEvent); } else { subscriber.onComplete(); break; } } while (demand.decrementAndGet() > 0); } catch (Exception e) { subscriber.onError(e); } }); } @Override public void cancel() { } private ByteBuffer getNextEvent() { ByteBuffer audioBuffer; byte[] audioBytes = new byte[CHUNK_SIZE_IN_BYTES]; int len = 0; try { len = inputStream.read(audioBytes); if (len <= 0) { audioBuffer = ByteBuffer.allocate(0); } else { audioBuffer = ByteBuffer.wrap(audioBytes, 0, len); } } catch (IOException e) { throw new UncheckedIOException(e); } return audioBuffer; } private AudioEvent audioEventFromBuffer(ByteBuffer bb) { return AudioEvent.builder() .audioChunk(SdkBytes.fromByteBuffer(bb)) .build(); } } }

See the complete example on GitHub.

Create the Client and Start the Stream

In the main method, create a request object, start the audio input stream and instantiate the publisher with the audio input.

You must also create a StartStreamTranscriptionResponseHandler to specify how to handle the response from Amazon Transcribe.

Then, use the TranscribeStreamingAsyncClient's startStreamTranscription method to start the bidirectional streaming.

Imports

import javax.sound.sampled.AudioFormat; import javax.sound.sampled.AudioSystem; import javax.sound.sampled.DataLine; import javax.sound.sampled.TargetDataLine; import javax.sound.sampled.AudioInputStream; import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider; import software.amazon.awssdk.services.transcribestreaming.TranscribeStreamingAsyncClient; import software.amazon.awssdk.services.transcribestreaming.model.LanguageCode; import software.amazon.awssdk.services.transcribestreaming.model.MediaEncoding; import software.amazon.awssdk.services.transcribestreaming.model.StartStreamTranscriptionRequest; import software.amazon.awssdk.services.transcribestreaming.model.StartStreamTranscriptionResponseHandler; import software.amazon.awssdk.services.transcribestreaming.model.TranscriptEvent;

Code

public static void main(String[] args) throws Exception { TranscribeStreamingAsyncClient client = TranscribeStreamingAsyncClient.builder().credentialsProvider(ProfileCredentialsProvider.create()).build(); StartStreamTranscriptionRequest request = StartStreamTranscriptionRequest.builder() .mediaEncoding(MediaEncoding.PCM) .languageCode(LanguageCode.EN_US) .mediaSampleRateHertz(16_000).build(); TargetDataLine mic = Microphone.get(); mic.start(); AudioStreamPublisher publisher = new AudioStreamPublisher(new AudioInputStream(mic)); StartStreamTranscriptionResponseHandler response = StartStreamTranscriptionResponseHandler.builder().subscriber(e -> { TranscriptEvent event = (TranscriptEvent) e; event.transcript().results().forEach(r -> r.alternatives().forEach(a -> System.out.println(a.transcript()))); }).build(); client.startStreamTranscription(request, publisher, response).join(); }

See the complete example on GitHub.

More Info