Wednesday, February 5, 2025

Azure AI services - Speech service

 Azure AI services - Speech service

Source: https://github.com/MicrosoftLearning/mslearn-ai-language

1. Create 'Speech service' in Azure - copy key and region

C# Code:

dotnet add package Microsoft.CognitiveServices.Speech --version 1.30.0
dotnet add package System.Windows.Extensions --version 4.6.0 

using System;
using System.Threading.Tasks;
using Microsoft.Extensions.Configuration;
using System.Collections.Generic;
using System.Text;

// Import namespaces
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using Microsoft.CognitiveServices.Speech.Translation;

using System.Media;

namespace speech_translation
{
    class Program
    {
        private static SpeechConfig speechConfig;
        private static SpeechTranslationConfig translationConfig;

        static async Task Main(string[] args)
        {
            try
            {
                // Get config settings from AppSettings
                IConfigurationBuilder builder =                 new ConfigurationBuilder().AddJsonFile("appsettings.json");
                IConfigurationRoot configuration = builder.Build();
                string aiSvcKey = "1RBACOGNMJb"; //configuration["SpeechKey"];
                string aiSvcRegion = "eastus"; //configuration["SpeechRegion"];

                // Set console encoding to unicode
                Console.InputEncoding = Encoding.Unicode;
                Console.OutputEncoding = Encoding.Unicode;

                // Configure translation
                translationConfig = SpeechTranslationConfig.FromSubscription(aiSvcKey,                 aiSvcRegion);
                translationConfig.SpeechRecognitionLanguage = "en-US";
                translationConfig.AddTargetLanguage("fr");
                translationConfig.AddTargetLanguage("es");
                translationConfig.AddTargetLanguage("hi");
                Console.WriteLine("Ready to translate from " +                 translationConfig.SpeechRecognitionLanguage);

                // Configure speech
                speechConfig = SpeechConfig.FromSubscription(aiSvcKey, aiSvcRegion);

                string targetLanguage = "";
                while (targetLanguage != "quit")
                {
                    Console.WriteLine("\nEnter a target language\n fr = French\n es =                     Spanish\n hi = Hindi\n Enter anything else to stop\n");
                    targetLanguage = Console.ReadLine().ToLower();
                    if (translationConfig.TargetLanguages.Contains(targetLanguage))
                    {
                        await Translate(targetLanguage);
                    }
                    else
                    {
                        targetLanguage = "quit";
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
        }

        static async Task Translate(string targetLanguage)
        {
            string translation = "";

            // Translate speech
            using AudioConfig audioConfig = AudioConfig.FromDefaultMicrophoneInput();
            using TranslationRecognizer translator = new TranslationRecognizer(translationConfig,             audioConfig);
            Console.WriteLine("Speak now...");
            TranslationRecognitionResult result = await translator.RecognizeOnceAsync();
            Console.WriteLine($"Translating '{result.Text}'");
            translation = result.Translations[targetLanguage];
            Console.OutputEncoding = Encoding.UTF8;
            Console.WriteLine(translation);

            // Translate speech
            // string audioFile = "station.wav";
            // SoundPlayer wavPlayer = new SoundPlayer(audioFile);
            // wavPlayer.Play();
            // using AudioConfig audioConfig = AudioConfig.FromWavFileInput(audioFile);
            // using TranslationRecognizer translator = new TranslationRecognizer(translationConfig,             audioConfig);
            // Console.WriteLine("Getting speech from file...");
            // TranslationRecognitionResult result = await translator.RecognizeOnceAsync();
            // Console.WriteLine($"Translating '{result.Text}'");
            // translation = result.Translations[targetLanguage];
            // Console.OutputEncoding = Encoding.UTF8;
            // Console.WriteLine(translation);

            // Synthesize translation
            var voices = new Dictionary<string, string>
            {
                ["fr"] = "fr-FR-HenriNeural",
                ["es"] = "es-ES-ElviraNeural",
                ["hi"] = "hi-IN-MadhurNeural"
            };
            speechConfig.SpeechSynthesisVoiceName = voices[targetLanguage];
            using SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer(speechConfig);
            SpeechSynthesisResult speak = await speechSynthesizer.SpeakTextAsync(translation);
            if (speak.Reason != ResultReason.SynthesizingAudioCompleted)
            {
                Console.WriteLine(speak.Reason);
            }
        }
    }
}



OutPut:
































Python Code:

pip install azure-cognitiveservices-speech==1.30.0
pip install playsound==1.3.0

from dotenv import load_dotenv
from datetime import datetime
import os

# Import namespaces
import azure.cognitiveservices.speech as speech_sdk


def main():
    try:
        global speech_config
        global translation_config

        # Get Configuration Settings
        load_dotenv()
        ai_key = '1RBACOGNMJb' #os.getenv('SPEECH_KEY')
        ai_region = 'eastus' # os.getenv('SPEECH_REGION')

        # Configure translation
        translation_config = speech_sdk.translation.SpeechTranslationConfig(subscription=ai_key,         region=ai_region)
        translation_config.speech_recognition_language = 'en-US'
        translation_config.add_target_language('fr')
        translation_config.add_target_language('es')
        translation_config.add_target_language('hi')
        print('Ready to translate from', translation_config.speech_recognition_language)


        # Configure speech
        speech_config = speech_sdk.SpeechConfig(subscription=ai_key, region=ai_region)


        # Get user input
        targetLanguage = ''
        while targetLanguage != 'quit':
            targetLanguage = input('\nEnter a target language\n fr = French\n es = Spanish\n hi =             Hindi\n Enter anything else to stop\n').lower()
            if targetLanguage in translation_config.target_languages:
                Translate(targetLanguage)
            else:
                targetLanguage = 'quit'
               

    except Exception as ex:
        print(ex)

def Translate(targetLanguage):
    translation = ''

    # Translate speech
    audio_config = speech_sdk.AudioConfig(use_default_microphone=True)
    translator = speech_sdk.translation.TranslationRecognizer(translation_config=translation_config,     audio_config=audio_config)
    print("Speak now...")
    result = translator.recognize_once_async().get()
    if result.reason == speech_sdk.ResultReason.TranslatedSpeech:
        print('Translating "{}"'.format(result.text))
    for language, translation in result.translations.items():
        print('Translation in {}: {}'.format(language, translation))
    else:
        print("No speech could be recognized or translation failed.")


    # Synthesize translation
    voices = {
        "fr": "fr-FR-HenriNeural",
        "es": "es-ES-ElviraNeural",
        "hi": "hi-IN-MadhurNeural"
    }
 
    # Assuming `targetLanguage` and `translation` are defined
    targetLanguage = "fr"  # Replace this with the actual target language
    translation = "Bonjour tout le monde"  # Replace this with the actual translation
 
    # Set the speech synthesis voice name based on the target language
    speech_config.speech_synthesis_voice_name = voices.get(targetLanguage)
 
    # Initialize the speech synthesizer
    speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config)
 
    # Synthesize the translated text to speech
    speak = speech_synthesizer.speak_text_async(translation).get()
 
    # Check if the synthesis was successful
    if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
        print(speak.reason)
    else:
        print("Speech synthesis completed successfully.")



if __name__ == "__main__":
    main()


OutPut:





























Python:
from dotenv import load_dotenv
from datetime import datetime
import os

# Import namespaces
import azure.cognitiveservices.speech as speech_sdk

def main():
    try:
        global speech_config
        global translation_config

        # Get Configuration Settings
        load_dotenv()
        ai_key = 'BI8xxusRRypyJeQHHhOe9ZblK3w3AAAYACOGawO8' # os.getenv('SPEECH_KEY')
        ai_region = 'eastus' # os.getenv('SPEECH_REGION')

        # Configure translation
        translation_config = speech_sdk.translation.SpeechTranslationConfig         (ai_key, ai_region)
        translation_config.speech_recognition_language = 'en-US'
        translation_config.add_target_language('fr')
        translation_config.add_target_language('es')
        translation_config.add_target_language('hi')
        print('Ready to translate from',         translation_config.speech_recognition_language)

        # Configure speech
        speech_config = speech_sdk.SpeechConfig(subscription=ai_key, region=ai_region)

        # Get user input
        targetLanguage = ''
        while targetLanguage != 'quit':
            targetLanguage = input('\nEnter a target language\n fr = French\n es =             Spanish\n hi = Hindi\n Enter anything else to stop\n').lower()
            if targetLanguage in translation_config.target_languages:
                Translate(targetLanguage)
            else:
                targetLanguage = 'quit'

    except Exception as ex:
        print(ex)

def Translate(targetLanguage):
    translation = ''

    # Translate speech
    audio_config = speech_sdk.AudioConfig(use_default_microphone=True)
    translator = speech_sdk.translation.TranslationRecognizer(translation_config,     audio_config=audio_config)
    print("Speak now...")
    result = translator.recognize_once_async().get()
    print('Translating "{}"'.format(result.text))
    translation = result.translations[targetLanguage]
    print(translation)

    # Synthesize translation
    voices = {
        "fr": "fr-FR-HenriNeural",
        "es": "es-ES-ElviraNeural",
        "hi": "hi-IN-MadhurNeural"
    }

    speech_config.speech_synthesis_voice_name = voices.get(targetLanguage)
    speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config)
    speak = speech_synthesizer.speak_text_async(translation).get()
    if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
        print(speak.reason)

if __name__ == "__main__":
    main()

Script: 
curl -X POST "https://sreemultiserviceaccount1.cognitiveservices.azure.com/language/:analyze-text?api-version=2023-04-01" -H "Content-Type: application/json" -H "Ocp-Apim-Subscription-Key: 2D9XtWQ0YOGFMV1" --data-ascii "{'analysisInput':{'documents':[{'id':1,'text':'hola'}]}, 'kind': 'LanguageDetection'}"

OutPut: 
{"kind":"LanguageDetectionResults",
"results":{"documents":[{"id":"1","warnings":[],
"detectedLanguage":{"name":"Spanish","iso6391Name":"es","confidenceScore":1.0}}],"errors":[],"modelVersion":"2024-04-01"}}

4 comments:

  1. Celebrating the outstanding reliability and speed winsetupfromusb stands out as a powerful tool for creating bootable USB drives enabling smooth installation of various operating systems while maintaining stability efficiency and user friendly workflow that simplifies complex setup processes for everyone

    ReplyDelete
  2. Cheerful simplicity in benchmarking crystaldiskmark offers a smooth experience for testing storage device performance. Interface feels minimal, results remain precise, and users can quickly understand system speed differences clearly.

    ReplyDelete
  3. High performance driver tool enhancing controller dshidmini enables accurate input mapping for gamers while maintaining stability responsiveness and compatibility across devices during gameplay sessions and extended usage scenarios delivering smooth and consistent experience for all users always without lag issues

    ReplyDelete

Featured Post

Create a Dataverse Table With Every Common Column Type Using Power Automate

Create a Dataverse Table With Every Common Column Type Using Power Automate Create a Dataverse Table With Every Common Column Typ...

Popular posts