mirror of https://github.com/emgucv/emgucv.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
146 lines
5.1 KiB
146 lines
5.1 KiB
//----------------------------------------------------------------------------
|
|
// Copyright (C) 2004-2021 by EMGU Corporation. All rights reserved.
|
|
//----------------------------------------------------------------------------
|
|
|
|
using System;
|
|
using System.Collections;
|
|
using System.Collections.Generic;
|
|
using System.Text;
|
|
using System.IO;
|
|
using System.Drawing;
|
|
using System.Runtime.InteropServices;
|
|
using System.Threading.Tasks;
|
|
using Emgu.CV;
|
|
using Emgu.CV.CvEnum;
|
|
using Emgu.CV.Dnn;
|
|
using Emgu.CV.OCR;
|
|
using Emgu.CV.Structure;
|
|
using Emgu.CV.Models;
|
|
using Emgu.Util;
|
|
using System.Diagnostics;
|
|
|
|
namespace Emgu.CV.Models
|
|
{
|
|
/// <summary>
|
|
/// Tesseract Ocr model.
|
|
/// </summary>
|
|
public class TesseractModel : DisposableObject, IProcessAndRenderModel
|
|
{
|
|
private String _modelFolderName = "tessdata";
|
|
private Tesseract _ocr;
|
|
private String _lang;
|
|
private OcrEngineMode _mode;
|
|
|
|
#if UNITY_EDITOR || UNITY_IOS || UNITY_ANDROID || UNITY_STANDALONE || UNITY_WEBGL
|
|
private IEnumerator
|
|
#else
|
|
private async Task
|
|
#endif
|
|
InitTesseract(String lang, OcrEngineMode mode, System.Net.DownloadProgressChangedEventHandler onDownloadProgressChanged = null)
|
|
{
|
|
if (_ocr == null)
|
|
{
|
|
FileDownloadManager manager = new FileDownloadManager();
|
|
manager.AddFile(Emgu.CV.OCR.Tesseract.GetLangFileUrl(lang), _modelFolderName);
|
|
manager.AddFile(Emgu.CV.OCR.Tesseract.GetLangFileUrl("osd"), _modelFolderName); //script orientation detection
|
|
|
|
if (onDownloadProgressChanged != null)
|
|
manager.OnDownloadProgressChanged += onDownloadProgressChanged;
|
|
#if UNITY_EDITOR || UNITY_IOS || UNITY_ANDROID || UNITY_STANDALONE || UNITY_WEBGL
|
|
yield return manager.Download();
|
|
#else
|
|
await manager.Download();
|
|
#endif
|
|
|
|
if (manager.AllFilesDownloaded)
|
|
{
|
|
_lang = lang;
|
|
_mode = mode;
|
|
FileInfo fi = new FileInfo(manager.Files[0].LocalFile);
|
|
_ocr = new Tesseract(fi.DirectoryName, _lang, _mode);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Clear and reset the model. Required Init function to be called again before calling ProcessAndRender.
|
|
/// </summary>
|
|
public void Clear()
|
|
{
|
|
if (_ocr != null)
|
|
{
|
|
_ocr.Dispose();
|
|
_ocr = null;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Release all the unmanaged memory associated to this tesseract OCR model.
|
|
/// </summary>
|
|
protected override void DisposeObject()
|
|
{
|
|
Clear();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Process the input image and render into the output image
|
|
/// </summary>
|
|
/// <param name="imageIn">The input image</param>
|
|
/// <param name="imageOut">The output image, can be the same as imageIn, in which case we will render directly into the input image</param>
|
|
/// <returns>The messages that we want to display.</returns>
|
|
public string ProcessAndRender(IInputArray imageIn, IInputOutputArray imageOut)
|
|
{
|
|
Stopwatch watch = Stopwatch.StartNew();
|
|
_ocr.SetImage(imageIn);
|
|
if (_ocr.Recognize() != 0)
|
|
throw new Exception("Failed to recognize image");
|
|
String ocrResult = _ocr.GetUTF8Text();
|
|
watch.Stop();
|
|
|
|
/*
|
|
if (imageOut != imageIn)
|
|
{
|
|
using (InputArray iaImageIn = imageIn.GetInputArray())
|
|
{
|
|
iaImageIn.CopyTo(imageOut);
|
|
}
|
|
}*/
|
|
|
|
Tesseract.Character[] characters = _ocr.GetCharacters();
|
|
foreach (Tesseract.Character c in characters)
|
|
{
|
|
CvInvoke.Rectangle(imageOut, c.Region, new MCvScalar(255, 0, 0));
|
|
}
|
|
|
|
return String.Format(
|
|
"tesseract version {2}; lang: {0}; mode: {1}{3}Text Detected:{3}{4}",
|
|
_lang,
|
|
_mode.ToString(),
|
|
Emgu.CV.OCR.Tesseract.VersionString,
|
|
System.Environment.NewLine, ocrResult);
|
|
|
|
}
|
|
|
|
/// <summary>
|
|
/// Initialize the tesseract ocr model
|
|
/// </summary>
|
|
/// <param name="onDownloadProgressChanged">Call back method during download</param>
|
|
/// <param name="initOptions">Initialization options. None supported at the moment, any value passed will be ignored.</param>
|
|
/// <returns>Asyn task</returns>
|
|
#if UNITY_EDITOR || UNITY_IOS || UNITY_ANDROID || UNITY_STANDALONE || UNITY_WEBGL
|
|
public IEnumerator
|
|
#else
|
|
public async Task
|
|
#endif
|
|
Init(System.Net.DownloadProgressChangedEventHandler onDownloadProgressChanged = null, Object initOptions = null)
|
|
{
|
|
#if UNITY_EDITOR || UNITY_IOS || UNITY_ANDROID || UNITY_STANDALONE || UNITY_WEBGL
|
|
yield return
|
|
#else
|
|
await
|
|
#endif
|
|
InitTesseract("eng", OcrEngineMode.TesseractOnly, onDownloadProgressChanged);
|
|
}
|
|
|
|
}
|
|
}
|