//---------------------------------------------------------------------------- // Copyright (C) 2004-2024 by EMGU Corporation. All rights reserved. //---------------------------------------------------------------------------- using System; using System.Collections; using System.Collections.Generic; using System.Diagnostics; using System.Drawing; using System.IO; using System.Linq; using System.Text; using System.Threading.Tasks; using Emgu.CV; using Emgu.CV.CvEnum; using Emgu.CV.Dnn; using Emgu.CV.Structure; using Emgu.CV.Util; using Emgu.Util; namespace Emgu.CV.Models { /// /// DNN Scene text detector /// public class SceneTextDetector : DisposableObject, IProcessAndRenderModel { /// /// The rendering method /// public RenderType RenderMethod { get { return RenderType.Update; } } /// /// Create a new SceneTextDetector /// /// The subfolder name where the model will be saved to. public SceneTextDetector(String modelFolderName = null) { if (modelFolderName != null) _modelFolderName = modelFolderName; } private String _modelFolderName = Path.Combine("emgu", "scene_text_detector"); private TextDetectionModel_DB _textDetector = null; private TextRecognitionModel _ocr = null; private FreetypeNotoSansCJK _freetype = null; /// /// Return true if the model is initialized /// public bool Initialized { get { if (_textDetector == null) return false; if (_ocr == null) return false; if (_freetype == null) return false; return true; } } /// /// Download and initialize the vehicle detector, the license plate detector and OCR. /// /// Callback when download progress has been changed /// Initialization options. None supported at the moment, any value passed will be ignored. /// Async task #if UNITY_EDITOR || UNITY_IOS || UNITY_ANDROID || UNITY_STANDALONE || UNITY_WEBGL public IEnumerator Init( FileDownloadManager.DownloadProgressChangedEventHandler onDownloadProgressChanged = null, Object initOptions = null) #else public async Task Init( FileDownloadManager.DownloadProgressChangedEventHandler onDownloadProgressChanged = null, Object initOptions = null) #endif { #if UNITY_EDITOR || UNITY_IOS || UNITY_ANDROID || UNITY_STANDALONE || UNITY_WEBGL yield return InitTextDetector(onDownloadProgressChanged); yield return InitTextRecognizer(onDownloadProgressChanged); yield return InitFreetype(onDownloadProgressChanged); #else await InitTextDetector(onDownloadProgressChanged); await InitTextRecognizer(onDownloadProgressChanged); await InitFreetype(onDownloadProgressChanged); #endif } #if UNITY_EDITOR || UNITY_IOS || UNITY_ANDROID || UNITY_STANDALONE || UNITY_WEBGL private IEnumerator InitFreetype(FileDownloadManager.DownloadProgressChangedEventHandler onDownloadProgressChanged = null) #else private async Task InitFreetype(FileDownloadManager.DownloadProgressChangedEventHandler onDownloadProgressChanged = null) #endif { if (_freetype == null) { _freetype = new FreetypeNotoSansCJK(_modelFolderName); #if UNITY_EDITOR || UNITY_IOS || UNITY_ANDROID || UNITY_STANDALONE || UNITY_WEBGL yield return _freetype.Init(onDownloadProgressChanged); #else await _freetype.Init(onDownloadProgressChanged); #endif } } #if UNITY_EDITOR || UNITY_IOS || UNITY_ANDROID || UNITY_STANDALONE || UNITY_WEBGL private IEnumerator InitTextDetector(FileDownloadManager.DownloadProgressChangedEventHandler onDownloadProgressChanged = null) #else private async Task InitTextDetector(FileDownloadManager.DownloadProgressChangedEventHandler onDownloadProgressChanged = null) #endif { if (_textDetector == null) { FileDownloadManager manager = new FileDownloadManager(); manager.AddFile( "https://github.com/emgucv/models/raw/master/scene_text/DB_TD500_resnet50.onnx", _modelFolderName, "7B83A5E7AFBBD9D70313C902D188FF328656510DBF57D66A711E07DFDB81DF20"); manager.OnDownloadProgressChanged += onDownloadProgressChanged; #if UNITY_EDITOR || UNITY_IOS || UNITY_ANDROID || UNITY_STANDALONE || UNITY_WEBGL yield return manager.Download(); #else await manager.Download(); #endif if (manager.AllFilesDownloaded) { _textDetector = new TextDetectionModel_DB(manager.Files[0].LocalFile); _textDetector.BinaryThreshold = 0.3f; _textDetector.PolygonThreshold = 0.5f; _textDetector.MaxCandidates = 200; _textDetector.UnclipRatio = 2.0; _textDetector.SetInputScale(1.0 / 255.0); _textDetector.SetInputSize(new Size(736, 736)); _textDetector.SetInputMean(new MCvScalar(122.67891434, 116.66876762, 104.00698793)); /* if (Emgu.CV.Cuda.CudaInvoke.HasCuda) { _vehicleAttrRecognizer.SetPreferableBackend(Emgu.CV.Dnn.Backend.Cuda); _vehicleAttrRecognizer.SetPreferableTarget(Emgu.CV.Dnn.Target.Cuda); }*/ } } } #if UNITY_EDITOR || UNITY_IOS || UNITY_ANDROID || UNITY_STANDALONE || UNITY_WEBGL private IEnumerator InitTextRecognizer(FileDownloadManager.DownloadProgressChangedEventHandler onDownloadProgressChanged = null) #else private async Task InitTextRecognizer(FileDownloadManager.DownloadProgressChangedEventHandler onDownloadProgressChanged = null) #endif { if (_ocr == null) { FileDownloadManager manager = new FileDownloadManager(); manager.AddFile( "https://github.com/emgucv/models/raw/master/scene_text/crnn_cs_CN.onnx", _modelFolderName, "C760BF82D684B87DFABB288E6C0F92D41A8CD6C1780661CA2C3CD10C2065A9BA"); manager.AddFile( "https://github.com/emgucv/models/raw/master/scene_text/alphabet_3944.txt", _modelFolderName, "8027C9832D86764FECCD9BDD8974829C86994617E5787F178ED97DB2BDA1481A"); manager.OnDownloadProgressChanged += onDownloadProgressChanged; #if UNITY_EDITOR || UNITY_IOS || UNITY_ANDROID || UNITY_STANDALONE || UNITY_WEBGL yield return manager.Download(); #else await manager.Download(); #endif if (manager.AllFilesDownloaded) { _ocr = new TextRecognitionModel(manager.Files[0].LocalFile); _ocr.DecodeType = "CTC-greedy"; String[] vocab = File.ReadAllLines(manager.Files[1].LocalFile); _ocr.Vocabulary = vocab; _ocr.SetInputScale(1.0 / 127.5); _ocr.SetInputMean(new MCvScalar(127.5, 127.5, 127.5)); _ocr.SetInputSize(new Size(100, 32)); /* if (Emgu.CV.Cuda.CudaInvoke.HasCuda) { _vehicleAttrRecognizer.SetPreferableBackend(Emgu.CV.Dnn.Backend.Cuda); _vehicleAttrRecognizer.SetPreferableTarget(Emgu.CV.Dnn.Target.Cuda); }*/ } } } private static void FourPointsTransform(IInputArray frame, PointF[] vertices, Size outputSize, IOutputArray result) { PointF[] targetVertices = { new Point(0, outputSize.Height - 1), new Point(0, 0), new Point(outputSize.Width - 1, 0), new Point(outputSize.Width - 1, outputSize.Height - 1) }; Mat rotationMatrix = CvInvoke.GetPerspectiveTransform(vertices, targetVertices); CvInvoke.WarpPerspective(frame, result, rotationMatrix, outputSize); } /// /// Detect scene text from the given image /// /// The image /// The detected scene text. public DetectedObject[] Detect(IInputArray image) { using (VectorOfVectorOfPoint vvp = new VectorOfVectorOfPoint()) using (VectorOfFloat confidents = new VectorOfFloat()) { _textDetector.Detect(image, vvp, confidents); Point[][] detectionResults = vvp.ToArrayOfArray(); #if UNSAFE_ALLOWED var confidentResult = confidents.GetSpan(); #else float[] confidentResult = confidents.ToArray(); #endif List results = new List(); for (int i = 0; i < detectionResults.Length; i++) { DetectedObject st = new DetectedObject(); PointF[] detectedPointF = Array.ConvertAll(detectionResults[i], p => new PointF((float)p.X, (float)p.Y)); st.Region = CvInvoke.BoundingRectangle(detectionResults[i]); st.Confident = confidentResult[i]; using (Mat textSubMat = new Mat()) { FourPointsTransform(image, detectedPointF, new Size(100, 32), textSubMat); String text = _ocr.Recognize(textSubMat); st.Label = text; } results.Add(st); } return results.ToArray(); } } private MCvScalar _renderColor = new MCvScalar(0, 0, 255); /// /// Get or Set the color used in rendering. /// public MCvScalar RenderColor { get { return _renderColor; } set { _renderColor = value; } } /// /// Draw the vehicles to the image. /// /// The image to be drawn to. /// The scene texts. public void Render(IInputOutputArray image, DetectedObject[] sceneTexts) { foreach (var detected in sceneTexts) detected.Render(image, RenderColor, _freetype); /* foreach (SceneText st in sceneTexts) { CvInvoke.Polylines(image, st.Region, true, new MCvScalar(0, 0, 255)); }*/ } /// /// Process the input image and render into the output image /// /// The input image /// /// The output image, can be the same as , in which case we will render directly into the input image. /// Note that if no text is detected, will remain unchanged. /// If text is detected, we will draw the text and (rectangle) region on top of the existing pixels of . /// If the is not the same object as , it is a good idea to copy the pixels over from the input image before passing it to this function. /// /// The messages that we want to display. public String ProcessAndRender(IInputArray imageIn, IInputOutputArray imageOut) { Stopwatch watch = Stopwatch.StartNew(); var sceneTexts = Detect(imageIn); watch.Stop(); Render(imageOut, sceneTexts); return String.Format("Detected in {0} milliseconds.", watch.ElapsedMilliseconds); } /// /// Clear and reset the model. Required Init function to be called again before calling ProcessAndRender. /// public void Clear() { if (_textDetector != null) { _textDetector.Dispose(); _textDetector = null; } if (_ocr != null) { _ocr.Dispose(); _ocr = null; } if (_freetype != null) { _freetype.Dispose(); _freetype = null; } } /// /// Release the memory associated with this scene text detector. /// protected override void DisposeObject() { Clear(); } } }