Monday, January 27, 2025

Azure AI services - Read Text in Images

 Azure AI services - Read Text in Images: 

Source :
https://github.com/MicrosoftLearning/AI-102-AIEngineer
1. Azure AI services | Azure AI services multi-service account

C# Code:
using Microsoft.Azure.CognitiveServices.Vision.ComputerVision;
using Microsoft.Azure.CognitiveServices.Vision.ComputerVision.Models;
using System;
using System.IO;
using System.Threading;
using System.Threading.Tasks;

// dotnet add package Microsoft.Azure.CognitiveServices.Vision.ComputerVision --version 6.0.0

namespace read_text
{
    class Program
    {
        private static ComputerVisionClient cvClient;
        static async Task Main(string[] args)
        {
            try
            {
                // Get config settings from AppSettings
                // IConfigurationBuilder builder =                 new ConfigurationBuilder().AddJsonFile("appsettings.json");
                // IConfigurationRoot configuration = builder.Build();
                string cogSvcEndpoint = "https://multiserviceaccount1.cognitiveservices.azure.com/";
               // configuration["CognitiveServicesEndpoint"];
                string cogSvcKey = "AjShjnYv3s56Ne4keUlZIqXJ799BAACYeBjFXJ3w3AAAEACOGAIif";                 // configuration["CognitiveServiceKey"];

                ApiKeyServiceClientCredentials credentials =                 new ApiKeyServiceClientCredentials(cogSvcKey);
                cvClient = new ComputerVisionClient(credentials)
                {
                    Endpoint = cogSvcEndpoint
                };

                // Menu for text reading functions
                Console.WriteLine("1: Use Read API for image\n2: Use Read API for document\n3:                 Read handwriting\nAny other key to quit");
                Console.WriteLine("Enter a number:");
                string command = Console.ReadLine();
                string imageFile;
                switch (command)
                {
                    case "1":
                        imageFile = "images/Lincoln.jpg";
                        await GetTextRead(imageFile);
                        break;
                    case "2":
                        imageFile = "images/Rome.pdf";
                        await GetTextRead(imageFile);
                        break;
                    case "3":
                        imageFile = "images/Note.jpg";
                        await GetTextRead(imageFile);
                        break;
                    default:
                        break;
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
        }

        static async Task GetTextRead(string imageFile)
        {
            Console.WriteLine($"Reading text in {imageFile}\n");
            // Use Read API to read text in image
            using (var imageData = File.OpenRead(imageFile))
            {
                var readOp = await cvClient.ReadInStreamAsync(imageData);

                // Get the async operation ID so we can check for the results
                string operationLocation = readOp.OperationLocation;
                string operationId = operationLocation.Substring(operationLocation.Length - 36);

                // Wait for the asynchronous operation to complete
                ReadOperationResult results;
                do
                {
                    Thread.Sleep(1000);
                    results = await cvClient.GetReadResultAsync(Guid.Parse(operationId));
                }
                while ((results.Status == OperationStatusCodes.Running ||
                        results.Status == OperationStatusCodes.NotStarted));

                // If the operation was successfully, process the text line by line
                if (results.Status == OperationStatusCodes.Succeeded)
                {
                    var textUrlFileResults = results.AnalyzeResult.ReadResults;
                    foreach (ReadResult page in textUrlFileResults)
                    {
                        foreach (Line line in page.Lines)
                        {
                            Console.WriteLine(line.Text);

                            // Uncomment the following line if you'd like to see the bounding box
                            //Console.WriteLine(line.BoundingBox);
                        }
                    }
                }
            }


        }
    }
}

Python Code:

# pip install python-dotenv
# pip install pillow
# pip install matplotlib
# pip install azure-cognitiveservices-vision-computervision==0.7.0

from dotenv import load_dotenv
import os
import time
from PIL import Image, ImageDraw
from matplotlib import pyplot as plt

# Import namespaces
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from msrest.authentication import CognitiveServicesCredentials

def main():
    global cv_client
    try:
        # Get Configuration Settings
        load_dotenv()
        cog_endpoint = "https://multiserviceaccount1.cognitiveservices.azure.com/"         #os.getenv('COG_SERVICE_ENDPOINT')
        cog_key = "AjShjnYv3s56Ne4keUlZBAACYeBjFXJ3w3AAAEACOGAIif"         # os.getenv('COG_SERVICE_KEY')

        # Authenticate Azure AI Vision client
        credential = CognitiveServicesCredentials(cog_key)
        cv_client = ComputerVisionClient(cog_endpoint, credential)

        # Menu for text reading functions
        print('1: Use Read API for image\n2: Use Read API for document\n3: Read         handwriting\nAny other key to quit')
        command = input('Enter a number:')
        if command == '1':
            image_file = os.path.join('images','Lincoln.jpg')
            GetTextRead(image_file)
        elif command =='2':
            image_file = os.path.join('images','Rome.pdf')
            GetTextRead(image_file)
        elif command =='3':
            image_file = os.path.join('images','Note.jpg')
            GetTextRead(image_file)
    except Exception as ex:
        print(ex)

def GetTextRead(image_file):
    print('Reading text in {}\n'.format(image_file))
   
    # Use Read API to read text in image
    with open(image_file, mode="rb") as image_data:
        read_op = cv_client.read_in_stream(image_data, raw=True)

    # Get the async operation ID so we can check for the results
    operation_location = read_op.headers["Operation-Location"]
    operation_id = operation_location.split("/")[-1]

    # Wait for the asynchronous operation to complete
    while True:
        read_results = cv_client.get_read_result(operation_id)
        if read_results.status not in [OperationStatusCodes.running,         OperationStatusCodes.not_started]:
            break
        time.sleep(1)

    # If the operation was successfully, process the text line by line
    if read_results.status == OperationStatusCodes.succeeded:
        for page in read_results.analyze_result.read_results:
            for line in page.lines:
                print(line.text)
                # Uncomment the following line if you'd like to see the bounding box
                # print(line.bounding_box)

if __name__ == "__main__":
    main()


Input:


Output:


Input:
Save as PDF file.


Output:



Input:


Output:



2 comments:

Featured Post

Create SharePoint Folder Structure in Destination (Only If Not Exists)

Why This Script Is Safe You can run it multiple times It will not create duplicate folders It will only create missing folders S...

Popular posts