Showing posts with label Speech service. Show all posts
Showing posts with label Speech service. Show all posts

Wednesday, February 5, 2025

Azure AI services - Speech service

 Azure AI services - Speech service

Source: https://github.com/MicrosoftLearning/mslearn-ai-language

1. Create 'Speech service' in Azure - copy key and region

C# Code:

dotnet add package Microsoft.CognitiveServices.Speech --version 1.30.0
dotnet add package System.Windows.Extensions --version 4.6.0 

using System;
using System.Threading.Tasks;
using Microsoft.Extensions.Configuration;
using System.Collections.Generic;
using System.Text;

// Import namespaces
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using Microsoft.CognitiveServices.Speech.Translation;

using System.Media;

namespace speech_translation
{
    class Program
    {
        private static SpeechConfig speechConfig;
        private static SpeechTranslationConfig translationConfig;

        static async Task Main(string[] args)
        {
            try
            {
                // Get config settings from AppSettings
                IConfigurationBuilder builder =                 new ConfigurationBuilder().AddJsonFile("appsettings.json");
                IConfigurationRoot configuration = builder.Build();
                string aiSvcKey = "1RBACOGNMJb"; //configuration["SpeechKey"];
                string aiSvcRegion = "eastus"; //configuration["SpeechRegion"];

                // Set console encoding to unicode
                Console.InputEncoding = Encoding.Unicode;
                Console.OutputEncoding = Encoding.Unicode;

                // Configure translation
                translationConfig = SpeechTranslationConfig.FromSubscription(aiSvcKey,                 aiSvcRegion);
                translationConfig.SpeechRecognitionLanguage = "en-US";
                translationConfig.AddTargetLanguage("fr");
                translationConfig.AddTargetLanguage("es");
                translationConfig.AddTargetLanguage("hi");
                Console.WriteLine("Ready to translate from " +                 translationConfig.SpeechRecognitionLanguage);

                // Configure speech
                speechConfig = SpeechConfig.FromSubscription(aiSvcKey, aiSvcRegion);

                string targetLanguage = "";
                while (targetLanguage != "quit")
                {
                    Console.WriteLine("\nEnter a target language\n fr = French\n es =                     Spanish\n hi = Hindi\n Enter anything else to stop\n");
                    targetLanguage = Console.ReadLine().ToLower();
                    if (translationConfig.TargetLanguages.Contains(targetLanguage))
                    {
                        await Translate(targetLanguage);
                    }
                    else
                    {
                        targetLanguage = "quit";
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
        }

        static async Task Translate(string targetLanguage)
        {
            string translation = "";

            // Translate speech
            using AudioConfig audioConfig = AudioConfig.FromDefaultMicrophoneInput();
            using TranslationRecognizer translator = new TranslationRecognizer(translationConfig,             audioConfig);
            Console.WriteLine("Speak now...");
            TranslationRecognitionResult result = await translator.RecognizeOnceAsync();
            Console.WriteLine($"Translating '{result.Text}'");
            translation = result.Translations[targetLanguage];
            Console.OutputEncoding = Encoding.UTF8;
            Console.WriteLine(translation);

            // Translate speech
            // string audioFile = "station.wav";
            // SoundPlayer wavPlayer = new SoundPlayer(audioFile);
            // wavPlayer.Play();
            // using AudioConfig audioConfig = AudioConfig.FromWavFileInput(audioFile);
            // using TranslationRecognizer translator = new TranslationRecognizer(translationConfig,             audioConfig);
            // Console.WriteLine("Getting speech from file...");
            // TranslationRecognitionResult result = await translator.RecognizeOnceAsync();
            // Console.WriteLine($"Translating '{result.Text}'");
            // translation = result.Translations[targetLanguage];
            // Console.OutputEncoding = Encoding.UTF8;
            // Console.WriteLine(translation);

            // Synthesize translation
            var voices = new Dictionary<string, string>
            {
                ["fr"] = "fr-FR-HenriNeural",
                ["es"] = "es-ES-ElviraNeural",
                ["hi"] = "hi-IN-MadhurNeural"
            };
            speechConfig.SpeechSynthesisVoiceName = voices[targetLanguage];
            using SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer(speechConfig);
            SpeechSynthesisResult speak = await speechSynthesizer.SpeakTextAsync(translation);
            if (speak.Reason != ResultReason.SynthesizingAudioCompleted)
            {
                Console.WriteLine(speak.Reason);
            }
        }
    }
}



OutPut:
































Python Code:

pip install azure-cognitiveservices-speech==1.30.0
pip install playsound==1.3.0

from dotenv import load_dotenv
from datetime import datetime
import os

# Import namespaces
import azure.cognitiveservices.speech as speech_sdk


def main():
    try:
        global speech_config
        global translation_config

        # Get Configuration Settings
        load_dotenv()
        ai_key = '1RBACOGNMJb' #os.getenv('SPEECH_KEY')
        ai_region = 'eastus' # os.getenv('SPEECH_REGION')

        # Configure translation
        translation_config = speech_sdk.translation.SpeechTranslationConfig(subscription=ai_key,         region=ai_region)
        translation_config.speech_recognition_language = 'en-US'
        translation_config.add_target_language('fr')
        translation_config.add_target_language('es')
        translation_config.add_target_language('hi')
        print('Ready to translate from', translation_config.speech_recognition_language)


        # Configure speech
        speech_config = speech_sdk.SpeechConfig(subscription=ai_key, region=ai_region)


        # Get user input
        targetLanguage = ''
        while targetLanguage != 'quit':
            targetLanguage = input('\nEnter a target language\n fr = French\n es = Spanish\n hi =             Hindi\n Enter anything else to stop\n').lower()
            if targetLanguage in translation_config.target_languages:
                Translate(targetLanguage)
            else:
                targetLanguage = 'quit'
               

    except Exception as ex:
        print(ex)

def Translate(targetLanguage):
    translation = ''

    # Translate speech
    audio_config = speech_sdk.AudioConfig(use_default_microphone=True)
    translator = speech_sdk.translation.TranslationRecognizer(translation_config=translation_config,     audio_config=audio_config)
    print("Speak now...")
    result = translator.recognize_once_async().get()
    if result.reason == speech_sdk.ResultReason.TranslatedSpeech:
        print('Translating "{}"'.format(result.text))
    for language, translation in result.translations.items():
        print('Translation in {}: {}'.format(language, translation))
    else:
        print("No speech could be recognized or translation failed.")


    # Synthesize translation
    voices = {
        "fr": "fr-FR-HenriNeural",
        "es": "es-ES-ElviraNeural",
        "hi": "hi-IN-MadhurNeural"
    }
 
    # Assuming `targetLanguage` and `translation` are defined
    targetLanguage = "fr"  # Replace this with the actual target language
    translation = "Bonjour tout le monde"  # Replace this with the actual translation
 
    # Set the speech synthesis voice name based on the target language
    speech_config.speech_synthesis_voice_name = voices.get(targetLanguage)
 
    # Initialize the speech synthesizer
    speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config)
 
    # Synthesize the translated text to speech
    speak = speech_synthesizer.speak_text_async(translation).get()
 
    # Check if the synthesis was successful
    if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
        print(speak.reason)
    else:
        print("Speech synthesis completed successfully.")



if __name__ == "__main__":
    main()


OutPut:





























Python:
from dotenv import load_dotenv
from datetime import datetime
import os

# Import namespaces
import azure.cognitiveservices.speech as speech_sdk

def main():
    try:
        global speech_config
        global translation_config

        # Get Configuration Settings
        load_dotenv()
        ai_key = 'BI8xxusRRypyJeQHHhOe9ZblK3w3AAAYACOGawO8' # os.getenv('SPEECH_KEY')
        ai_region = 'eastus' # os.getenv('SPEECH_REGION')

        # Configure translation
        translation_config = speech_sdk.translation.SpeechTranslationConfig         (ai_key, ai_region)
        translation_config.speech_recognition_language = 'en-US'
        translation_config.add_target_language('fr')
        translation_config.add_target_language('es')
        translation_config.add_target_language('hi')
        print('Ready to translate from',         translation_config.speech_recognition_language)

        # Configure speech
        speech_config = speech_sdk.SpeechConfig(subscription=ai_key, region=ai_region)

        # Get user input
        targetLanguage = ''
        while targetLanguage != 'quit':
            targetLanguage = input('\nEnter a target language\n fr = French\n es =             Spanish\n hi = Hindi\n Enter anything else to stop\n').lower()
            if targetLanguage in translation_config.target_languages:
                Translate(targetLanguage)
            else:
                targetLanguage = 'quit'

    except Exception as ex:
        print(ex)

def Translate(targetLanguage):
    translation = ''

    # Translate speech
    audio_config = speech_sdk.AudioConfig(use_default_microphone=True)
    translator = speech_sdk.translation.TranslationRecognizer(translation_config,     audio_config=audio_config)
    print("Speak now...")
    result = translator.recognize_once_async().get()
    print('Translating "{}"'.format(result.text))
    translation = result.translations[targetLanguage]
    print(translation)

    # Synthesize translation
    voices = {
        "fr": "fr-FR-HenriNeural",
        "es": "es-ES-ElviraNeural",
        "hi": "hi-IN-MadhurNeural"
    }

    speech_config.speech_synthesis_voice_name = voices.get(targetLanguage)
    speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config)
    speak = speech_synthesizer.speak_text_async(translation).get()
    if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
        print(speak.reason)

if __name__ == "__main__":
    main()

Script: 
curl -X POST "https://sreemultiserviceaccount1.cognitiveservices.azure.com/language/:analyze-text?api-version=2023-04-01" -H "Content-Type: application/json" -H "Ocp-Apim-Subscription-Key: 2D9XtWQ0YOGFMV1" --data-ascii "{'analysisInput':{'documents':[{'id':1,'text':'hola'}]}, 'kind': 'LanguageDetection'}"

OutPut: 
{"kind":"LanguageDetectionResults",
"results":{"documents":[{"id":"1","warnings":[],
"detectedLanguage":{"name":"Spanish","iso6391Name":"es","confidenceScore":1.0}}],"errors":[],"modelVersion":"2024-04-01"}}

Featured Post

Building Secure APIs with FastAPI and Azure AD Authentication

Building Secure APIs with FastAPI and Azure AD Authentication Published on September 2, 2025 In today's world of microservices and API-f...

Popular posts