Azure AI services - Recognize and synthesize speech:
1. Create 'Speech service' in Azure
C# Code:
using System;
using System.Threading.Tasks;
using Microsoft.Extensions.Configuration;
using System.Media;
// Import namespaces
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
// dotnet add package Microsoft.CognitiveServices.Speech --version 1.30.0
// dotnet add package System.Windows.Extensions --version 4.6.0
namespace speaking_clock
{
class Program
{
private static SpeechConfig speechConfig;
static async Task Main(string[] args)
{
try
{
// Get config settings from AppSettings
// IConfigurationBuilder builder =
new ConfigurationBuilder().AddJsonFile("appsettings.json");
// IConfigurationRoot configuration = builder.Build();
string aiSvcKey = "C7A2c2oHTErWxabILyi7SCucDFXJ3w3AAAYACOGSRaz";
//configuration["SpeechKey"];
string aiSvcRegion = "eastus"; // configuration["SpeechRegion"];
// Configure speech service
speechConfig = SpeechConfig.FromSubscription(aiSvcKey, aiSvcRegion);
Console.WriteLine("Ready to use speech service in " + speechConfig.Region);
// Configure voice
speechConfig.SpeechSynthesisVoiceName = "en-US-AriaNeural";
// Get spoken input
string command = "";
command = "what time is it?"; //await TranscribeCommand();
if (command.ToLower() == "what time is it?")
{
await TellTime();
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
static async Task<string> TranscribeCommand()
{
string command = "";
// Configure speech recognition
using AudioConfig audioConfig = AudioConfig.FromDefaultMicrophoneInput();
using SpeechRecognizer speechRecognizer =
new SpeechRecognizer(speechConfig, audioConfig);
Console.WriteLine("Speak now...");
// Configure speech recognition
// string audioFile = "time.wav";
// SoundPlayer wavPlayer = new SoundPlayer(audioFile);
// wavPlayer.Play();
// using AudioConfig audioConfig = AudioConfig.FromWavFileInput(audioFile);
// using SpeechRecognizer speechRecognizer =
new SpeechRecognizer(speechConfig, audioConfig);
// Process speech input
SpeechRecognitionResult speech = await speechRecognizer.RecognizeOnceAsync();
if (speech.Reason == ResultReason.RecognizedSpeech)
{
command = speech.Text;
Console.WriteLine(command);
}
else
{
Console.WriteLine(speech.Reason);
if (speech.Reason == ResultReason.Canceled)
{
var cancellation = CancellationDetails.FromResult(speech);
Console.WriteLine(cancellation.Reason);
Console.WriteLine(cancellation.ErrorDetails);
}
}
// Return the command
return command;
}
static async Task TellTime()
{
var now = DateTime.Now;
string responseText = "The time is " + now.Hour.ToString() + ":" +
now.Minute.ToString("D2");
// Configure speech synthesis
speechConfig.SpeechSynthesisVoiceName = "en-GB-RyanNeural";
using SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer(speechConfig);
// Synthesize spoken output
string responseSsml = $@"
<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>
<voice name='en-GB-LibbyNeural'>
{responseText}
<break strength='weak'/>
Time to end this lab!
</voice>
</speak>";
SpeechSynthesisResult speak = await speechSynthesizer.SpeakSsmlAsync(responseSsml);
if (speak.Reason != ResultReason.SynthesizingAudioCompleted)
{
Console.WriteLine(speak.Reason);
}
// Print the response
Console.WriteLine(responseText);
}
}
}
OutPut:
Python Code:
from dotenv import load_dotenv
from datetime import datetime
from playsound import playsound
import os
# Import namespaces
import azure.cognitiveservices.speech as speech_sdk
# pip install azure-cognitiveservices-speech==1.30.0
# pip install python-dotenv
# pip install playsound==1.2.2
def main():
try:
global speech_config
# Get Configuration Settings
load_dotenv()
ai_key = 'C7A2c2oHTErWxabILyi7SBAACYeBjFXJ3w3AAAYACOGSRaz' # os.getenv('SPEECH_KEY')
ai_region = 'eastus' # os.getenv('SPEECH_REGION')
# Configure speech service
speech_config = speech_sdk.SpeechConfig(subscription=ai_key, region=ai_region)
print('Ready to use speech service in:', speech_config.region)
# Get spoken input
command = 'what time is it?' #TranscribeCommand()
if command.lower() == 'what time is it?':
TellTime()
except Exception as ex:
print(ex)
def TranscribeCommand():
command = ''
# Configure speech recognition
# audio_config = speech_sdk.AudioConfig(use_default_microphone=True)
# speech_recognizer =
speech_sdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
# print('Speak now...')
# Configure speech recognition
current_dir = os.getcwd()
audioFile = current_dir + '\\time.wav'
playsound(audioFile)
audio_config = speech_sdk.AudioConfig(filename=audioFile)
speech_recognizer = speech_sdk.SpeechRecognizer(speech_config, audio_config)
# Process speech input
speech = speech_recognizer.recognize_once_async().get()
if speech.reason == speech_sdk.ResultReason.RecognizedSpeech:
command = speech.text
print(command)
else:
print(speech.reason)
if speech.reason == speech_sdk.ResultReason.Canceled:
cancellation = speech.cancellation_details
print(cancellation.reason)
print(cancellation.error_details)
# Return the command
return command
def TellTime():
now = datetime.now()
response_text = 'The time is {}:{:02d}'.format(now.hour, now.minute)
# Configure speech synthesis
speech_config.speech_synthesis_voice_name = "en-GB-RyanNeural"
speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config=speech_config)
# Synthesize spoken output
# speak = speech_synthesizer.speak_text_async(response_text).get()
# if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
# print(speak.reason)
# Synthesize spoken output
responseSsml = " \
<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'> \
<voice name='en-GB-LibbyNeural'> \
{} \
<break strength='weak'/> \
Time to end this lab! \
</voice> \
</speak>".format(response_text)
speak = speech_synthesizer.speak_ssml_async(responseSsml).get()
if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
print(speak.reason)
# Print the response
print(response_text)
if __name__ == "__main__":
main()
OutPut:
My curiosity led me to explore speednewscentral, where I discovered engaging articles presented with clarity and professionalism. The website combines simplicity with usefulness, creating a smooth browsing experience from beginning to end.
ReplyDeleteSomewhere between routine internet searches, TechChevy introduced an experience built around clarity and accessibility. The pages are arranged intelligently, allowing visitors to focus on informative content without unnecessary distractions.
ReplyDeleteWithout demanding unnecessary attention from visitors, dailynewsreleases.com quietly impresses through consistency, accessibility, and useful information. Its thoughtful arrangement allows readers to focus entirely on the quality of the content.
ReplyDelete