You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

146 lines
5.1 KiB

  1. //----------------------------------------------------------------------------
  2. // Copyright (C) 2004-2021 by EMGU Corporation. All rights reserved.
  3. //----------------------------------------------------------------------------
  4. using System;
  5. using System.Collections;
  6. using System.Collections.Generic;
  7. using System.Text;
  8. using System.IO;
  9. using System.Drawing;
  10. using System.Runtime.InteropServices;
  11. using System.Threading.Tasks;
  12. using Emgu.CV;
  13. using Emgu.CV.CvEnum;
  14. using Emgu.CV.Dnn;
  15. using Emgu.CV.OCR;
  16. using Emgu.CV.Structure;
  17. using Emgu.CV.Models;
  18. using Emgu.Util;
  19. using System.Diagnostics;
  20. namespace Emgu.CV.Models
  21. {
  22. /// <summary>
  23. /// Tesseract Ocr model.
  24. /// </summary>
  25. public class TesseractModel : DisposableObject, IProcessAndRenderModel
  26. {
  27. private String _modelFolderName = "tessdata";
  28. private Tesseract _ocr;
  29. private String _lang;
  30. private OcrEngineMode _mode;
  31. #if UNITY_EDITOR || UNITY_IOS || UNITY_ANDROID || UNITY_STANDALONE || UNITY_WEBGL
  32. private IEnumerator
  33. #else
  34. private async Task
  35. #endif
  36. InitTesseract(String lang, OcrEngineMode mode, System.Net.DownloadProgressChangedEventHandler onDownloadProgressChanged = null)
  37. {
  38. if (_ocr == null)
  39. {
  40. FileDownloadManager manager = new FileDownloadManager();
  41. manager.AddFile(Emgu.CV.OCR.Tesseract.GetLangFileUrl(lang), _modelFolderName);
  42. manager.AddFile(Emgu.CV.OCR.Tesseract.GetLangFileUrl("osd"), _modelFolderName); //script orientation detection
  43. if (onDownloadProgressChanged != null)
  44. manager.OnDownloadProgressChanged += onDownloadProgressChanged;
  45. #if UNITY_EDITOR || UNITY_IOS || UNITY_ANDROID || UNITY_STANDALONE || UNITY_WEBGL
  46. yield return manager.Download();
  47. #else
  48. await manager.Download();
  49. #endif
  50. if (manager.AllFilesDownloaded)
  51. {
  52. _lang = lang;
  53. _mode = mode;
  54. FileInfo fi = new FileInfo(manager.Files[0].LocalFile);
  55. _ocr = new Tesseract(fi.DirectoryName, _lang, _mode);
  56. }
  57. }
  58. }
  59. /// <summary>
  60. /// Clear and reset the model. Required Init function to be called again before calling ProcessAndRender.
  61. /// </summary>
  62. public void Clear()
  63. {
  64. if (_ocr != null)
  65. {
  66. _ocr.Dispose();
  67. _ocr = null;
  68. }
  69. }
  70. /// <summary>
  71. /// Release all the unmanaged memory associated to this tesseract OCR model.
  72. /// </summary>
  73. protected override void DisposeObject()
  74. {
  75. Clear();
  76. }
  77. /// <summary>
  78. /// Process the input image and render into the output image
  79. /// </summary>
  80. /// <param name="imageIn">The input image</param>
  81. /// <param name="imageOut">The output image, can be the same as imageIn, in which case we will render directly into the input image</param>
  82. /// <returns>The messages that we want to display.</returns>
  83. public string ProcessAndRender(IInputArray imageIn, IInputOutputArray imageOut)
  84. {
  85. Stopwatch watch = Stopwatch.StartNew();
  86. _ocr.SetImage(imageIn);
  87. if (_ocr.Recognize() != 0)
  88. throw new Exception("Failed to recognize image");
  89. String ocrResult = _ocr.GetUTF8Text();
  90. watch.Stop();
  91. /*
  92. if (imageOut != imageIn)
  93. {
  94. using (InputArray iaImageIn = imageIn.GetInputArray())
  95. {
  96. iaImageIn.CopyTo(imageOut);
  97. }
  98. }*/
  99. Tesseract.Character[] characters = _ocr.GetCharacters();
  100. foreach (Tesseract.Character c in characters)
  101. {
  102. CvInvoke.Rectangle(imageOut, c.Region, new MCvScalar(255, 0, 0));
  103. }
  104. return String.Format(
  105. "tesseract version {2}; lang: {0}; mode: {1}{3}Text Detected:{3}{4}",
  106. _lang,
  107. _mode.ToString(),
  108. Emgu.CV.OCR.Tesseract.VersionString,
  109. System.Environment.NewLine, ocrResult);
  110. }
  111. /// <summary>
  112. /// Initialize the tesseract ocr model
  113. /// </summary>
  114. /// <param name="onDownloadProgressChanged">Call back method during download</param>
  115. /// <param name="initOptions">Initialization options. None supported at the moment, any value passed will be ignored.</param>
  116. /// <returns>Asyn task</returns>
  117. #if UNITY_EDITOR || UNITY_IOS || UNITY_ANDROID || UNITY_STANDALONE || UNITY_WEBGL
  118. public IEnumerator
  119. #else
  120. public async Task
  121. #endif
  122. Init(System.Net.DownloadProgressChangedEventHandler onDownloadProgressChanged = null, Object initOptions = null)
  123. {
  124. #if UNITY_EDITOR || UNITY_IOS || UNITY_ANDROID || UNITY_STANDALONE || UNITY_WEBGL
  125. yield return
  126. #else
  127. await
  128. #endif
  129. InitTesseract("eng", OcrEngineMode.TesseractOnly, onDownloadProgressChanged);
  130. }
  131. }
  132. }