Install Dependencies

pip install pyaudio 
pip install reverie-sdk
pip install python-dotenv

How does it Works?

The main steps involved in the script are as follows:

  1. Environment Configuration:
    The script loads essential credentials (REVERIE_APP_ID and REVERIE_API_KEY) from environment variables using the dotenv package. These credentials are required for accessing the Reverie SDK and its services.
Python
import os
import dotenv
from reverie_sdk import ReverieClient

dotenv.load_dotenv("../.env")
REVERIE_APP_ID = os.environ.get("REVERIE_APP_ID")
REVERIE_API_KEY = os.environ.get("REVERIE_API_KEY")
client = ReverieClient(
    api_key=REVERIE_API_KEY,
    app_id=REVERIE_APP_ID,
)
  1. Real-Time Audio Capture::
    The script uses the pyaudio library to stream audio data from the microphone. The audio is captured in real-time, with the system processing 16kHz, mono audio in 16-bit signed integer format.
Python
import pyaudio
import asyncio

stream = AudioStream()
pa = pyaudio.PyAudio()

def mic_callback(in_data, frame_count, time_info, status):
    try:
        asyncio.run(stream.add_chunk_async(in_data))
    except Exception as e:
        print(f"Error in mic callback: {e}")
        return (None, pyaudio.paAbort)
    return (None, pyaudio.paContinue)

pa_stream = pa.open(
    rate=16000,
    channels=1,
    format=pyaudio.paInt16,
    frames_per_buffer=1024,
    input=True,
    stream_callback=mic_callback,
)
pa_stream.start_stream()
  1. Speech-to-Text (STT) Conversion:
    The audio data is continuously streamed and sent to Reverie’s Automatic Speech Recognition (ASR) service for transcription. The system listens for spoken words and converts them into text in the specified source language (in this case, English).
Python
from reverie_sdk.services.asr import ReverieAsrResult, AudioStream

await client.asr.stt_stream_async(
    src_lang=source_lang,
    bytes_or_stream=stream,
    callback=asr_callback,
    format="16k_int16",
    punctuate="true",
    domain="voice_search",
)
  1. Voice Search Use Case::
    The stt_stream_async method is used with the “voice_search” domain. This allows the system to process and transcribe speech, making it ideal for voice search applications where users speak their search queries, and the system translates those queries into text for further action.
Python
await client.asr.stt_stream_async(
    src_lang=source_lang,
    bytes_or_stream=stream,
    callback=asr_callback,
    format="16k_int16",
    punctuate="true",
    domain="voice_search",
)
  1. Real-Time Transcription::
    The transcription occurs in real-time, with each segment of speech being converted into text and displayed immediately. Once the system recognizes a complete, final transcription, the result is returned and printed.
Python
async def speech_to_text(source_lang):
    asr_responses: list[ReverieAsrResult] = []

    def asr_callback(resp: ReverieAsrResult):
        print(resp)
        asr_responses.append(resp)

    while True:
        if len(asr_responses) == 0:
            continue

        if asr_responses[-1].final:
            print(asr_responses[-1].display_text)
            return asr_responses[-1].display_text
  1. Error Handling::
    The script includes error handling to manage potential issues during the audio capture, streaming, and transcription process, ensuring the system continues functioning smoothly in case of interruptions.
import traceback

try:
    # [Audio streaming and transcription code]
except Exception as err:
    print(f"Error in Speech-to-Text: {err}")
    traceback.print_exc()
finally:
    pa_stream.stop_stream()
    pa_stream.close()
  1. Output:
    The final transcribed text is printed to the console, which could then be used for various purposes, such as initiating a search query, performing a task, or further processing based on the transcription result.

Sample Code

import asyncio
import os
import traceback
import pyaudio
from reverie_sdk import ReverieClient
from reverie_sdk.services.asr import ReverieAsrResult, AudioStream
import dotenv

dotenv.load_dotenv("../.env")

REVERIE_APP_ID = os.environ.get("REVERIE_APP_ID")
REVERIE_API_KEY = os.environ.get("REVERIE_API_KEY")

client = ReverieClient(
    api_key=REVERIE_API_KEY,
    app_id=REVERIE_APP_ID,
)


async def speech_to_text(source_lang):
    stream = AudioStream()
    pa = pyaudio.PyAudio()

    def mic_callback(in_data, frame_count, time_info, status):
        try:
            asyncio.run(stream.add_chunk_async(in_data))
        except Exception as e:
            print(f"Error in mic callback: {e}")
            return (None, pyaudio.paAbort)
        return (None, pyaudio.paContinue)

    pa_stream = pa.open(
        rate=16000,
        channels=1,
        format=pyaudio.paInt16,
        frames_per_buffer=1024,
        input=True,
        stream_callback=mic_callback,
    )
    pa_stream.start_stream()

    print("Listening at STT..")

    asr_responses: list[ReverieAsrResult] = []

    def asr_callback(resp: ReverieAsrResult):
        print(resp)
        asr_responses.append(resp)

    try:
        await client.asr.stt_stream_async(
            src_lang=source_lang,
            bytes_or_stream=stream,
            callback=asr_callback,
            format="16k_int16",
            punctuate="true",
            domain="voice_search",
        )

        while True:
            if len(asr_responses) == 0:
                continue

            if asr_responses[-1].final:
                print(asr_responses[-1].display_text)
                return asr_responses[-1].display_text

    except Exception as err:
        print(f"Error in Speech-to-Text: {err}")
        traceback.print_exc()

    finally:
        pa_stream.stop_stream()
        pa_stream.close()


asyncio.run(speech_to_text("en"))