AI/ML, AWS, Azure, DevOps, GCP, M365, Microsoft Power Platform, SharePoint: Azure AI services

Monday, January 27, 2025

Azure AI services - Read Text in Images

Azure AI services - Read Text in Images:

Source :
https://github.com/MicrosoftLearning/AI-102-AIEngineer
1. Azure AI services | Azure AI services multi-service account

C# Code:

using Microsoft.Azure.CognitiveServices.Vision.ComputerVision;
using Microsoft.Azure.CognitiveServices.Vision.ComputerVision.Models;
using System;
using System.IO;
using System.Threading;
using System.Threading.Tasks;

// dotnet add package Microsoft.Azure.CognitiveServices.Vision.ComputerVision --version 6.0.0

namespace read_text
{
    class Program
    {
        private static ComputerVisionClient cvClient;
        static async Task Main(string[] args)
        {
            try
            {
                // Get config settings from AppSettings
                // IConfigurationBuilder builder = 
                new ConfigurationBuilder().AddJsonFile("appsettings.json");
                // IConfigurationRoot configuration = builder.Build();
                string cogSvcEndpoint = "https://multiserviceaccount1.cognitiveservices.azure.com/";
                // configuration["CognitiveServicesEndpoint"];
                string cogSvcKey = "AjShjnYv3s56Ne4keUlZIqXJ799BAACYeBjFXJ3w3AAAEACOGAIif"; 
                // configuration["CognitiveServiceKey"];

                ApiKeyServiceClientCredentials credentials = 
                new ApiKeyServiceClientCredentials(cogSvcKey);
                cvClient = new ComputerVisionClient(credentials)
                {
                    Endpoint = cogSvcEndpoint
                };

                // Menu for text reading functions
                Console.WriteLine("1: Use Read API for image\n2: Use Read API for document\n3: 
                Read handwriting\nAny other key to quit");
                Console.WriteLine("Enter a number:");
                string command = Console.ReadLine();
                string imageFile;
                switch (command)
                {
                    case "1":
                        imageFile = "images/Lincoln.jpg";
                        await GetTextRead(imageFile);
                        break;
                    case "2":
                        imageFile = "images/Rome.pdf";
                        await GetTextRead(imageFile);
                        break;
                    case "3":
                        imageFile = "images/Note.jpg";
                        await GetTextRead(imageFile);
                        break;
                    default:
                        break;
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
        }

        static async Task GetTextRead(string imageFile)
        {
            Console.WriteLine($"Reading text in {imageFile}\n");
            // Use Read API to read text in image
            using (var imageData = File.OpenRead(imageFile))
            {
                var readOp = await cvClient.ReadInStreamAsync(imageData);

                // Get the async operation ID so we can check for the results
                string operationLocation = readOp.OperationLocation;
                string operationId = operationLocation.Substring(operationLocation.Length - 36);

                // Wait for the asynchronous operation to complete
                ReadOperationResult results;
                do
                {
                    Thread.Sleep(1000);
                    results = await cvClient.GetReadResultAsync(Guid.Parse(operationId));
                }
                while ((results.Status == OperationStatusCodes.Running ||
                        results.Status == OperationStatusCodes.NotStarted));

                // If the operation was successfully, process the text line by line
                if (results.Status == OperationStatusCodes.Succeeded)
                {
                    var textUrlFileResults = results.AnalyzeResult.ReadResults;
                    foreach (ReadResult page in textUrlFileResults)
                    {
                        foreach (Line line in page.Lines)
                        {
                            Console.WriteLine(line.Text);

                            // Uncomment the following line if you'd like to see the bounding box 
                            //Console.WriteLine(line.BoundingBox);
                        }
                    }
                }
            }


        }
    }
}

Python Code:

# pip install python-dotenv
# pip install pillow
# pip install matplotlib
# pip install azure-cognitiveservices-vision-computervision==0.7.0

from dotenv import load_dotenv
import os
import time
from PIL import Image, ImageDraw
from matplotlib import pyplot as plt

# Import namespaces
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from msrest.authentication import CognitiveServicesCredentials

def main():
    global cv_client
    try:
        # Get Configuration Settings
        load_dotenv()
        cog_endpoint = "https://multiserviceaccount1.cognitiveservices.azure.com/" 
        #os.getenv('COG_SERVICE_ENDPOINT')
        cog_key = "AjShjnYv3s56Ne4keUlZBAACYeBjFXJ3w3AAAEACOGAIif" 
        # os.getenv('COG_SERVICE_KEY')

        # Authenticate Azure AI Vision client
        credential = CognitiveServicesCredentials(cog_key)
        cv_client = ComputerVisionClient(cog_endpoint, credential)

        # Menu for text reading functions
        print('1: Use Read API for image\n2: Use Read API for document\n3: Read 
        handwriting\nAny other key to quit')
        command = input('Enter a number:')
        if command == '1':
            image_file = os.path.join('images','Lincoln.jpg')
            GetTextRead(image_file)
        elif command =='2':
            image_file = os.path.join('images','Rome.pdf')
            GetTextRead(image_file)
        elif command =='3':
            image_file = os.path.join('images','Note.jpg')
            GetTextRead(image_file)
    except Exception as ex:
        print(ex)

def GetTextRead(image_file):
    print('Reading text in {}\n'.format(image_file))
    
    # Use Read API to read text in image
    with open(image_file, mode="rb") as image_data:
        read_op = cv_client.read_in_stream(image_data, raw=True)

    # Get the async operation ID so we can check for the results
    operation_location = read_op.headers["Operation-Location"]
    operation_id = operation_location.split("/")[-1]

    # Wait for the asynchronous operation to complete
    while True:
        read_results = cv_client.get_read_result(operation_id)
        if read_results.status not in [OperationStatusCodes.running, 
        OperationStatusCodes.not_started]:
            break
        time.sleep(1)

    # If the operation was successfully, process the text line by line
    if read_results.status == OperationStatusCodes.succeeded:
        for page in read_results.analyze_result.read_results:
            for line in page.lines:
                print(line.text)
                # Uncomment the following line if you'd like to see the bounding box
                # print(line.bounding_box)

if __name__ == "__main__":
    main()