AI/ML, AWS, Azure, DevOps, GCP, M365, Microsoft Power Platform, RPA, SharePoint,: Azure AI services

Wednesday, February 5, 2025

Azure AI services - Speech service

Azure AI services - Speech service

Source: https://github.com/MicrosoftLearning/mslearn-ai-language

1. Create 'Speech service' in Azure - copy key and region

C# Code:

dotnet add package Microsoft.CognitiveServices.Speech --version 1.30.0

dotnet add package System.Windows.Extensions --version 4.6.0

using System;
using System.Threading.Tasks;
using Microsoft.Extensions.Configuration;
using System.Collections.Generic;
using System.Text;

// Import namespaces
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using Microsoft.CognitiveServices.Speech.Translation;

using System.Media;

namespace speech_translation
{
    class Program
    {
        private static SpeechConfig speechConfig;
        private static SpeechTranslationConfig translationConfig;

        static async Task Main(string[] args)
        {
            try
            {
                // Get config settings from AppSettings
                IConfigurationBuilder builder = 
                new ConfigurationBuilder().AddJsonFile("appsettings.json");
                IConfigurationRoot configuration = builder.Build();
                string aiSvcKey = "1RBACOGNMJb"; //configuration["SpeechKey"];
                string aiSvcRegion = "eastus"; //configuration["SpeechRegion"];

                // Set console encoding to unicode
                Console.InputEncoding = Encoding.Unicode;
                Console.OutputEncoding = Encoding.Unicode;

                // Configure translation
                translationConfig = SpeechTranslationConfig.FromSubscription(aiSvcKey, 
                aiSvcRegion);
                translationConfig.SpeechRecognitionLanguage = "en-US";
                translationConfig.AddTargetLanguage("fr");
                translationConfig.AddTargetLanguage("es");
                translationConfig.AddTargetLanguage("hi");
                Console.WriteLine("Ready to translate from " + 
                translationConfig.SpeechRecognitionLanguage);

                // Configure speech
                speechConfig = SpeechConfig.FromSubscription(aiSvcKey, aiSvcRegion);

                string targetLanguage = "";
                while (targetLanguage != "quit")
                {
                    Console.WriteLine("\nEnter a target language\n fr = French\n es = 
                    Spanish\n hi = Hindi\n Enter anything else to stop\n");
                    targetLanguage = Console.ReadLine().ToLower();
                    if (translationConfig.TargetLanguages.Contains(targetLanguage))
                    {
                        await Translate(targetLanguage);
                    }
                    else
                    {
                        targetLanguage = "quit";
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
        }

        static async Task Translate(string targetLanguage)
        {
            string translation = "";

            // Translate speech
            using AudioConfig audioConfig = AudioConfig.FromDefaultMicrophoneInput();
            using TranslationRecognizer translator = new TranslationRecognizer(translationConfig, 
            audioConfig);
            Console.WriteLine("Speak now...");
            TranslationRecognitionResult result = await translator.RecognizeOnceAsync();
            Console.WriteLine($"Translating '{result.Text}'");
            translation = result.Translations[targetLanguage];
            Console.OutputEncoding = Encoding.UTF8;
            Console.WriteLine(translation);

            // Translate speech
            // string audioFile = "station.wav";
            // SoundPlayer wavPlayer = new SoundPlayer(audioFile);
            // wavPlayer.Play();
            // using AudioConfig audioConfig = AudioConfig.FromWavFileInput(audioFile);
            // using TranslationRecognizer translator = new TranslationRecognizer(translationConfig,
            audioConfig);
            // Console.WriteLine("Getting speech from file...");
            // TranslationRecognitionResult result = await translator.RecognizeOnceAsync();
            // Console.WriteLine($"Translating '{result.Text}'");
            // translation = result.Translations[targetLanguage];
            // Console.OutputEncoding = Encoding.UTF8;
            // Console.WriteLine(translation);

            // Synthesize translation
            var voices = new Dictionary<string, string>
            {
                ["fr"] = "fr-FR-HenriNeural",
                ["es"] = "es-ES-ElviraNeural",
                ["hi"] = "hi-IN-MadhurNeural"
            };
            speechConfig.SpeechSynthesisVoiceName = voices[targetLanguage];
            using SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer(speechConfig);
            SpeechSynthesisResult speak = await speechSynthesizer.SpeakTextAsync(translation);
            if (speak.Reason != ResultReason.SynthesizingAudioCompleted)
            {
                Console.WriteLine(speak.Reason);
            }
        }
    }
}

OutPut:

Python Code:

pip install azure-cognitiveservices-speech==1.30.0

pip install playsound==1.3.0

from dotenv import load_dotenv
from datetime import datetime
import os

# Import namespaces
import azure.cognitiveservices.speech as speech_sdk


def main():
    try:
        global speech_config
        global translation_config

        # Get Configuration Settings
        load_dotenv()
        ai_key = '1RBACOGNMJb' #os.getenv('SPEECH_KEY')
        ai_region = 'eastus' # os.getenv('SPEECH_REGION')

        # Configure translation
        translation_config = speech_sdk.translation.SpeechTranslationConfig(subscription=ai_key, 
        region=ai_region)
        translation_config.speech_recognition_language = 'en-US'
        translation_config.add_target_language('fr')
        translation_config.add_target_language('es')
        translation_config.add_target_language('hi')
        print('Ready to translate from', translation_config.speech_recognition_language)


        # Configure speech
        speech_config = speech_sdk.SpeechConfig(subscription=ai_key, region=ai_region)


        # Get user input
        targetLanguage = ''
        while targetLanguage != 'quit':
            targetLanguage = input('\nEnter a target language\n fr = French\n es = Spanish\n hi = 
            Hindi\n Enter anything else to stop\n').lower()
            if targetLanguage in translation_config.target_languages:
                Translate(targetLanguage)
            else:
                targetLanguage = 'quit'
                

    except Exception as ex:
        print(ex)

def Translate(targetLanguage):
    translation = ''

    # Translate speech
    audio_config = speech_sdk.AudioConfig(use_default_microphone=True)
    translator = speech_sdk.translation.TranslationRecognizer(translation_config=translation_config,
    audio_config=audio_config)
    print("Speak now...")
    result = translator.recognize_once_async().get() 
    if result.reason == speech_sdk.ResultReason.TranslatedSpeech:
        print('Translating "{}"'.format(result.text))
    for language, translation in result.translations.items():
        print('Translation in {}: {}'.format(language, translation))
    else:
        print("No speech could be recognized or translation failed.")


    # Synthesize translation
    voices = {
        "fr": "fr-FR-HenriNeural",
        "es": "es-ES-ElviraNeural",
        "hi": "hi-IN-MadhurNeural"
    }
 
    # Assuming `targetLanguage` and `translation` are defined
    targetLanguage = "fr"  # Replace this with the actual target language
    translation = "Bonjour tout le monde"  # Replace this with the actual translation
 
    # Set the speech synthesis voice name based on the target language
    speech_config.speech_synthesis_voice_name = voices.get(targetLanguage)
 
    # Initialize the speech synthesizer
    speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config)
 
    # Synthesize the translated text to speech
    speak = speech_synthesizer.speak_text_async(translation).get()
 
    # Check if the synthesis was successful
    if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
        print(speak.reason)
    else:
        print("Speech synthesis completed successfully.")



if __name__ == "__main__":
    main()

OutPut:

Python:

from dotenv import load_dotenv
from datetime import datetime
import os

# Import namespaces
import azure.cognitiveservices.speech as speech_sdk

def main():
    try:
        global speech_config
        global translation_config

        # Get Configuration Settings
        load_dotenv()
        ai_key = 'BI8xxusRRypyJeQHHhOe9ZblK3w3AAAYACOGawO8' # os.getenv('SPEECH_KEY')
        ai_region = 'eastus' # os.getenv('SPEECH_REGION')

        # Configure translation
        translation_config = speech_sdk.translation.SpeechTranslationConfig
        (ai_key, ai_region)
        translation_config.speech_recognition_language = 'en-US'
        translation_config.add_target_language('fr')
        translation_config.add_target_language('es')
        translation_config.add_target_language('hi')
        print('Ready to translate from', 
        translation_config.speech_recognition_language)

        # Configure speech
        speech_config = speech_sdk.SpeechConfig(subscription=ai_key, region=ai_region)

        # Get user input
        targetLanguage = ''
        while targetLanguage != 'quit':
            targetLanguage = input('\nEnter a target language\n fr = French\n es = 
            Spanish\n hi = Hindi\n Enter anything else to stop\n').lower()
            if targetLanguage in translation_config.target_languages:
                Translate(targetLanguage)
            else:
                targetLanguage = 'quit'

    except Exception as ex:
        print(ex)

def Translate(targetLanguage):
    translation = ''

    # Translate speech
    audio_config = speech_sdk.AudioConfig(use_default_microphone=True)
    translator = speech_sdk.translation.TranslationRecognizer(translation_config, 
    audio_config=audio_config)
    print("Speak now...")
    result = translator.recognize_once_async().get()
    print('Translating "{}"'.format(result.text))
    translation = result.translations[targetLanguage]
    print(translation)

    # Synthesize translation
    voices = {
        "fr": "fr-FR-HenriNeural",
        "es": "es-ES-ElviraNeural",
        "hi": "hi-IN-MadhurNeural"
    }

    speech_config.speech_synthesis_voice_name = voices.get(targetLanguage)
    speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config)
    speak = speech_synthesizer.speak_text_async(translation).get()
    if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
        print(speak.reason)

if __name__ == "__main__":
    main()

Script:
curl -X POST "https://sreemultiserviceaccount1.cognitiveservices.azure.com/language/:analyze-text?api-version=2023-04-01" -H "Content-Type: application/json" -H "Ocp-Apim-Subscription-Key: 2D9XtWQ0YOGFMV1" --data-ascii "{'analysisInput':{'documents':[{'id':1,'text':'hola'}]}, 'kind': 'LanguageDetection'}"

OutPut:

{"kind":"LanguageDetectionResults",

"results":{"documents":[{"id":"1","warnings":[],

"detectedLanguage":{"name":"Spanish","iso6391Name":"es","confidenceScore":1.0}}],"errors":[],"modelVersion":"2024-04-01"}}

6 comments:

AnonymousOctober 2, 2025 at 8:45 PM
79CAB63987
instagram takipçi alma güvenilir
small swivel accent chair
ReplyDelete
Replies
xenaJune 3, 2026 at 11:07 AM
Celebrating the outstanding reliability and speed winsetupfromusb stands out as a powerful tool for creating bootable USB drives enabling smooth installation of various operating systems while maintaining stability efficiency and user friendly workflow that simplifies complex setup processes for everyone
ReplyDelete
Replies
xenaJune 3, 2026 at 1:15 PM
Cheerful simplicity in benchmarking crystaldiskmark offers a smooth experience for testing storage device performance. Interface feels minimal, results remain precise, and users can quickly understand system speed differences clearly.
ReplyDelete
Replies
NikolayJune 3, 2026 at 1:18 PM
High performance driver tool enhancing controller dshidmini enables accurate input mapping for gamers while maintaining stability responsiveness and compatibility across devices during gameplay sessions and extended usage scenarios delivering smooth and consistent experience for all users always without lag issues
ReplyDelete
Replies
AnonymousJune 24, 2026 at 6:58 AM
5B494B4F
Ardahan Esçort
Giresun Esçort
Kütahya Esçort
Balıkesir Esçort
Muş Esçort
Kayseri Esçort
Osmaniye Esçort
Ağrı Esçort
Bartın Esçort
ReplyDelete
Replies
AnonymousJune 28, 2026 at 9:12 PM
5F391C41
Forum
Forum
Forum
Forum
Forum
Forum
Forum
Forum
Forum
ReplyDelete
Replies

Add comment

AI/ML, AWS, Azure, DevOps, GCP, M365, Microsoft Power Platform, RPA, SharePoint,

Wednesday, February 5, 2025

Azure AI services - Speech service

6 comments:

Featured Post

Generating a Microsoft Graph Bearer Token in Power Automate Desktop (App ID + Secret)

Popular posts

Search This Blog