Microsoft Power Platform, SharePoint, Azure, AWS, Google Cloud, DevOps, AI/ML: Speech service

Showing posts with label Speech service. Show all posts

Wednesday, February 5, 2025

Azure AI services - Speech service

Azure AI services - Speech service

Source: https://github.com/MicrosoftLearning/mslearn-ai-language

1. Create 'Speech service' in Azure - copy key and region

C# Code:

dotnet add package Microsoft.CognitiveServices.Speech --version 1.30.0

dotnet add package System.Windows.Extensions --version 4.6.0

using System;
using System.Threading.Tasks;
using Microsoft.Extensions.Configuration;
using System.Collections.Generic;
using System.Text;

// Import namespaces
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using Microsoft.CognitiveServices.Speech.Translation;

using System.Media;

namespace speech_translation
{
    class Program
    {
        private static SpeechConfig speechConfig;
        private static SpeechTranslationConfig translationConfig;

        static async Task Main(string[] args)
        {
            try
            {
                // Get config settings from AppSettings
                IConfigurationBuilder builder = 
                new ConfigurationBuilder().AddJsonFile("appsettings.json");
                IConfigurationRoot configuration = builder.Build();
                string aiSvcKey = "1RBACOGNMJb"; //configuration["SpeechKey"];
                string aiSvcRegion = "eastus"; //configuration["SpeechRegion"];

                // Set console encoding to unicode
                Console.InputEncoding = Encoding.Unicode;
                Console.OutputEncoding = Encoding.Unicode;

                // Configure translation
                translationConfig = SpeechTranslationConfig.FromSubscription(aiSvcKey, 
                aiSvcRegion);
                translationConfig.SpeechRecognitionLanguage = "en-US";
                translationConfig.AddTargetLanguage("fr");
                translationConfig.AddTargetLanguage("es");
                translationConfig.AddTargetLanguage("hi");
                Console.WriteLine("Ready to translate from " + 
                translationConfig.SpeechRecognitionLanguage);

                // Configure speech
                speechConfig = SpeechConfig.FromSubscription(aiSvcKey, aiSvcRegion);

                string targetLanguage = "";
                while (targetLanguage != "quit")
                {
                    Console.WriteLine("\nEnter a target language\n fr = French\n es = 
                    Spanish\n hi = Hindi\n Enter anything else to stop\n");
                    targetLanguage = Console.ReadLine().ToLower();
                    if (translationConfig.TargetLanguages.Contains(targetLanguage))
                    {
                        await Translate(targetLanguage);
                    }
                    else
                    {
                        targetLanguage = "quit";
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
        }

        static async Task Translate(string targetLanguage)
        {
            string translation = "";

            // Translate speech
            using AudioConfig audioConfig = AudioConfig.FromDefaultMicrophoneInput();
            using TranslationRecognizer translator = new TranslationRecognizer(translationConfig, 
            audioConfig);
            Console.WriteLine("Speak now...");
            TranslationRecognitionResult result = await translator.RecognizeOnceAsync();
            Console.WriteLine($"Translating '{result.Text}'");
            translation = result.Translations[targetLanguage];
            Console.OutputEncoding = Encoding.UTF8;
            Console.WriteLine(translation);

            // Translate speech
            // string audioFile = "station.wav";
            // SoundPlayer wavPlayer = new SoundPlayer(audioFile);
            // wavPlayer.Play();
            // using AudioConfig audioConfig = AudioConfig.FromWavFileInput(audioFile);
            // using TranslationRecognizer translator = new TranslationRecognizer(translationConfig,
            audioConfig);
            // Console.WriteLine("Getting speech from file...");
            // TranslationRecognitionResult result = await translator.RecognizeOnceAsync();
            // Console.WriteLine($"Translating '{result.Text}'");
            // translation = result.Translations[targetLanguage];
            // Console.OutputEncoding = Encoding.UTF8;
            // Console.WriteLine(translation);

            // Synthesize translation
            var voices = new Dictionary<string, string>
            {
                ["fr"] = "fr-FR-HenriNeural",
                ["es"] = "es-ES-ElviraNeural",
                ["hi"] = "hi-IN-MadhurNeural"
            };
            speechConfig.SpeechSynthesisVoiceName = voices[targetLanguage];
            using SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer(speechConfig);
            SpeechSynthesisResult speak = await speechSynthesizer.SpeakTextAsync(translation);
            if (speak.Reason != ResultReason.SynthesizingAudioCompleted)
            {
                Console.WriteLine(speak.Reason);
            }
        }
    }
}

OutPut:

Python Code:

pip install azure-cognitiveservices-speech==1.30.0

pip install playsound==1.3.0

from dotenv import load_dotenv
from datetime import datetime
import os

# Import namespaces
import azure.cognitiveservices.speech as speech_sdk


def main():
    try:
        global speech_config
        global translation_config

        # Get Configuration Settings
        load_dotenv()
        ai_key = '1RBACOGNMJb' #os.getenv('SPEECH_KEY')
        ai_region = 'eastus' # os.getenv('SPEECH_REGION')

        # Configure translation
        translation_config = speech_sdk.translation.SpeechTranslationConfig(subscription=ai_key, 
        region=ai_region)
        translation_config.speech_recognition_language = 'en-US'
        translation_config.add_target_language('fr')
        translation_config.add_target_language('es')
        translation_config.add_target_language('hi')
        print('Ready to translate from', translation_config.speech_recognition_language)


        # Configure speech
        speech_config = speech_sdk.SpeechConfig(subscription=ai_key, region=ai_region)


        # Get user input
        targetLanguage = ''
        while targetLanguage != 'quit':
            targetLanguage = input('\nEnter a target language\n fr = French\n es = Spanish\n hi = 
            Hindi\n Enter anything else to stop\n').lower()
            if targetLanguage in translation_config.target_languages:
                Translate(targetLanguage)
            else:
                targetLanguage = 'quit'
                

    except Exception as ex:
        print(ex)

def Translate(targetLanguage):
    translation = ''

    # Translate speech
    audio_config = speech_sdk.AudioConfig(use_default_microphone=True)
    translator = speech_sdk.translation.TranslationRecognizer(translation_config=translation_config,
    audio_config=audio_config)
    print("Speak now...")
    result = translator.recognize_once_async().get() 
    if result.reason == speech_sdk.ResultReason.TranslatedSpeech:
        print('Translating "{}"'.format(result.text))
    for language, translation in result.translations.items():
        print('Translation in {}: {}'.format(language, translation))
    else:
        print("No speech could be recognized or translation failed.")


    # Synthesize translation
    voices = {
        "fr": "fr-FR-HenriNeural",
        "es": "es-ES-ElviraNeural",
        "hi": "hi-IN-MadhurNeural"
    }
 
    # Assuming `targetLanguage` and `translation` are defined
    targetLanguage = "fr"  # Replace this with the actual target language
    translation = "Bonjour tout le monde"  # Replace this with the actual translation
 
    # Set the speech synthesis voice name based on the target language
    speech_config.speech_synthesis_voice_name = voices.get(targetLanguage)
 
    # Initialize the speech synthesizer
    speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config)
 
    # Synthesize the translated text to speech
    speak = speech_synthesizer.speak_text_async(translation).get()
 
    # Check if the synthesis was successful
    if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
        print(speak.reason)
    else:
        print("Speech synthesis completed successfully.")



if __name__ == "__main__":
    main()

OutPut:

Python:

from dotenv import load_dotenv
from datetime import datetime
import os

# Import namespaces
import azure.cognitiveservices.speech as speech_sdk

def main():
    try:
        global speech_config
        global translation_config

        # Get Configuration Settings
        load_dotenv()
        ai_key = 'BI8xxusRRypyJeQHHhOe9ZblK3w3AAAYACOGawO8' # os.getenv('SPEECH_KEY')
        ai_region = 'eastus' # os.getenv('SPEECH_REGION')

        # Configure translation
        translation_config = speech_sdk.translation.SpeechTranslationConfig
        (ai_key, ai_region)
        translation_config.speech_recognition_language = 'en-US'
        translation_config.add_target_language('fr')
        translation_config.add_target_language('es')
        translation_config.add_target_language('hi')
        print('Ready to translate from', 
        translation_config.speech_recognition_language)

        # Configure speech
        speech_config = speech_sdk.SpeechConfig(subscription=ai_key, region=ai_region)

        # Get user input
        targetLanguage = ''
        while targetLanguage != 'quit':
            targetLanguage = input('\nEnter a target language\n fr = French\n es = 
            Spanish\n hi = Hindi\n Enter anything else to stop\n').lower()
            if targetLanguage in translation_config.target_languages:
                Translate(targetLanguage)
            else:
                targetLanguage = 'quit'

    except Exception as ex:
        print(ex)

def Translate(targetLanguage):
    translation = ''

    # Translate speech
    audio_config = speech_sdk.AudioConfig(use_default_microphone=True)
    translator = speech_sdk.translation.TranslationRecognizer(translation_config, 
    audio_config=audio_config)
    print("Speak now...")
    result = translator.recognize_once_async().get()
    print('Translating "{}"'.format(result.text))
    translation = result.translations[targetLanguage]
    print(translation)

    # Synthesize translation
    voices = {
        "fr": "fr-FR-HenriNeural",
        "es": "es-ES-ElviraNeural",
        "hi": "hi-IN-MadhurNeural"
    }

    speech_config.speech_synthesis_voice_name = voices.get(targetLanguage)
    speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config)
    speak = speech_synthesizer.speak_text_async(translation).get()
    if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
        print(speak.reason)

if __name__ == "__main__":
    main()

Script:
curl -X POST "https://sreemultiserviceaccount1.cognitiveservices.azure.com/language/:analyze-text?api-version=2023-04-01" -H "Content-Type: application/json" -H "Ocp-Apim-Subscription-Key: 2D9XtWQ0YOGFMV1" --data-ascii "{'analysisInput':{'documents':[{'id':1,'text':'hola'}]}, 'kind': 'LanguageDetection'}"

OutPut:

{"kind":"LanguageDetectionResults",

"results":{"documents":[{"id":"1","warnings":[],

"detectedLanguage":{"name":"Spanish","iso6391Name":"es","confidenceScore":1.0}}],"errors":[],"modelVersion":"2024-04-01"}}

Microsoft Power Platform, SharePoint, Azure, AWS, Google Cloud, DevOps, AI/ML

Wednesday, February 5, 2025

Azure AI services - Speech service

Featured Post

Building Secure APIs with FastAPI and Azure AD Authentication

Popular posts

Search This Blog