Browse Source

Added Pix class. Rename Recognize to SetImage.

pull/19/head
Canming Huang 9 years ago
parent
commit
615dacf45e
  1. 494
      Emgu.CV.Example/OCR/OCRForm.cs
  2. 2
      Emgu.CV.Extern/tesseract/libtesseract/tesseract-ocr.git
  3. 32
      Emgu.CV.Extern/tesseract/tesseract.cpp
  4. 9
      Emgu.CV.Extern/tesseract/tesseract_c.h
  5. 147
      Emgu.CV.OCR/OcrInvoke.cs
  6. 34
      Emgu.CV.OCR/Pix.cs
  7. 41
      Emgu.CV.OCR/Tesseract.cs
  8. 130
      Emgu.CV/PInvoke/CvInvokeCore.cs

494
Emgu.CV.Example/OCR/OCRForm.cs

@ -18,271 +18,279 @@ using Emgu.CV.Util;
namespace OCR
{
public partial class OCRForm : Form
{
private Tesseract _ocr;
public OCRForm()
{
InitializeComponent();
InitOcr("", "eng", OcrEngineMode.TesseractLstmCombined);
ocrOptionsComboBox.SelectedIndex = 0;
Mat img = new Mat(200, 400, DepthType.Cv8U, 3); //Create a 3 channel image of 400x200
img.SetTo(new Bgr(255, 0, 0).MCvScalar); // set it to Blue color
//Draw "Hello, world." on the image using the specific font
CvInvoke.PutText(
img,
"Hello, world",
new System.Drawing.Point(10, 80),
FontFace.HersheyComplex,
1.0,
new Bgr(0, 255, 0).MCvScalar);
OcrImage(img);
}
private void InitOcr(String path, String lang, OcrEngineMode mode)
{
try
{
if (_ocr != null)
public partial class OCRForm : Form
{
private Tesseract _ocr;
public OCRForm()
{
InitializeComponent();
InitOcr("", "eng", OcrEngineMode.TesseractOnly);
ocrOptionsComboBox.SelectedIndex = 0;
Mat img = new Mat(200, 400, DepthType.Cv8U, 3); //Create a 3 channel image of 400x200
img.SetTo(new Bgr(255, 0, 0).MCvScalar); // set it to Blue color
//Draw "Hello, world." on the image using the specific font
CvInvoke.PutText(
img,
"Hello, world",
new System.Drawing.Point(10, 80),
FontFace.HersheyComplex,
1.0,
new Bgr(0, 255, 0).MCvScalar);
OcrImage(img);
}
private void InitOcr(String path, String lang, OcrEngineMode mode)
{
try
{
_ocr.Dispose();
_ocr = null;
if (_ocr != null)
{
_ocr.Dispose();
_ocr = null;
}
_ocr = new Tesseract(path, lang, mode);
languageNameLabel.Text = String.Format("{0} : {1}", lang, mode.ToString());
}
_ocr = new Tesseract(path, lang, mode);
languageNameLabel.Text = String.Format("{0} : {1}", lang, mode.ToString());
}
catch (Exception e)
{
_ocr = null;
MessageBox.Show(e.Message, "Failed to initialize tesseract OCR engine", MessageBoxButtons.OK);
languageNameLabel.Text = "Failed to initialize tesseract OCR engine";
}
}
/// <summary>
/// The OCR mode
/// </summary>
private enum OCRMode
{
/// <summary>
/// Perform a full page OCR
/// </summary>
FullPage,
/// <summary>
/// Detect the text region before applying OCR.
/// </summary>
TextDetection
}
private OCRMode Mode
{
get { return ocrOptionsComboBox.SelectedIndex == 0 ? OCRMode.FullPage : OCRMode.TextDetection; }
}
private static Rectangle ScaleRectangle(Rectangle r, double scale)
{
double centerX = r.Location.X + r.Width / 2.0;
double centerY = r.Location.Y + r.Height / 2.0;
double newWidth = Math.Round(r.Width * scale);
double newHeight = Math.Round(r.Height * scale);
return new Rectangle((int)Math.Round(centerX - newWidth / 2.0), (int)Math.Round(centerY - newHeight / 2.0),
(int)newWidth, (int)newHeight);
}
private static String OcrImage(Tesseract ocr, Mat image, OCRMode mode, Mat imageColor)
{
Bgr drawCharColor = new Bgr(Color.Blue);
if (image.NumberOfChannels == 1)
CvInvoke.CvtColor(image, imageColor, ColorConversion.Gray2Bgr);
else
image.CopyTo(imageColor);
if (mode == OCRMode.FullPage)
{
ocr.Recognize(image);
Tesseract.Character[] characters = ocr.GetCharacters();
if (characters.Length == 0)
catch (Exception e)
{
Mat imgGrey = new Mat();
CvInvoke.CvtColor(image, imgGrey, ColorConversion.Bgr2Gray);
Mat imgThresholded = new Mat();
CvInvoke.Threshold(imgGrey, imgThresholded, 65, 255, ThresholdType.Binary);
ocr.Recognize(imgThresholded);
characters = ocr.GetCharacters();
imageColor = imgThresholded;
if (characters.Length == 0)
{
CvInvoke.Threshold(image, imgThresholded, 190, 255, ThresholdType.Binary);
ocr.Recognize(imgThresholded);
characters = ocr.GetCharacters();
imageColor = imgThresholded;
}
_ocr = null;
MessageBox.Show(e.Message, "Failed to initialize tesseract OCR engine", MessageBoxButtons.OK);
languageNameLabel.Text = "Failed to initialize tesseract OCR engine";
}
foreach (Tesseract.Character c in characters)
}
/// <summary>
/// The OCR mode
/// </summary>
private enum OCRMode
{
/// <summary>
/// Perform a full page OCR
/// </summary>
FullPage,
/// <summary>
/// Detect the text region before applying OCR.
/// </summary>
TextDetection
}
private OCRMode Mode
{
get { return ocrOptionsComboBox.SelectedIndex == 0 ? OCRMode.FullPage : OCRMode.TextDetection; }
}
private static Rectangle ScaleRectangle(Rectangle r, double scale)
{
double centerX = r.Location.X + r.Width / 2.0;
double centerY = r.Location.Y + r.Height / 2.0;
double newWidth = Math.Round(r.Width * scale);
double newHeight = Math.Round(r.Height * scale);
return new Rectangle((int)Math.Round(centerX - newWidth / 2.0), (int)Math.Round(centerY - newHeight / 2.0),
(int)newWidth, (int)newHeight);
}
private static String OcrImage(Tesseract ocr, Mat image, OCRMode mode, Mat imageColor)
{
Bgr drawCharColor = new Bgr(Color.Red);
if (image.NumberOfChannels == 1)
CvInvoke.CvtColor(image, imageColor, ColorConversion.Gray2Bgr);
else
image.CopyTo(imageColor);
if (mode == OCRMode.FullPage)
{
CvInvoke.Rectangle(imageColor, c.Region, drawCharColor.MCvScalar);
}
return ocr.GetText();
}
else
{
bool checkInvert = true;
Rectangle[] regions;
using (
ERFilterNM1 er1 = new ERFilterNM1("trained_classifierNM1.xml", 8, 0.00025f, 0.13f, 0.4f, true, 0.1f))
using (ERFilterNM2 er2 = new ERFilterNM2("trained_classifierNM2.xml", 0.3f))
{
int channelCount = image.NumberOfChannels;
UMat[] channels = new UMat[checkInvert ? channelCount * 2 : channelCount];
for (int i = 0; i < channelCount; i++)
{
UMat c = new UMat();
CvInvoke.ExtractChannel(image, c, i);
channels[i] = c;
}
if (checkInvert)
{
for (int i = 0; i < channelCount; i++)
{
UMat c = new UMat();
CvInvoke.BitwiseNot(channels[i], c);
channels[i + channelCount] = c;
}
}
VectorOfERStat[] regionVecs = new VectorOfERStat[channels.Length];
for (int i = 0; i < regionVecs.Length; i++)
regionVecs[i] = new VectorOfERStat();
try
{
for (int i = 0; i < channels.Length; i++)
{
er1.Run(channels[i], regionVecs[i]);
er2.Run(channels[i], regionVecs[i]);
}
using (VectorOfUMat vm = new VectorOfUMat(channels))
{
regions = ERFilter.ERGrouping(image, vm, regionVecs, ERFilter.GroupingMethod.OrientationHoriz,
"trained_classifier_erGrouping.xml", 0.5f);
}
}
finally
{
foreach (UMat tmp in channels)
if (tmp != null)
tmp.Dispose();
foreach (VectorOfERStat tmp in regionVecs)
if (tmp != null)
tmp.Dispose();
}
Rectangle imageRegion = new Rectangle(Point.Empty, imageColor.Size);
for (int i = 0; i < regions.Length; i++)
{
Rectangle r = ScaleRectangle(regions[i], 1.1);
r.Intersect(imageRegion);
regions[i] = r;
}
ocr.SetImage(imageColor);
int recResult = ocr.Recognize();
Tesseract.Character[] characters = ocr.GetCharacters();
if (characters.Length == 0)
{
Mat imgGrey = new Mat();
CvInvoke.CvtColor(image, imgGrey, ColorConversion.Bgr2Gray);
Mat imgThresholded = new Mat();
CvInvoke.Threshold(imgGrey, imgThresholded, 65, 255, ThresholdType.Binary);
ocr.SetImage(imgThresholded);
characters = ocr.GetCharacters();
imageColor = imgThresholded;
if (characters.Length == 0)
{
CvInvoke.Threshold(image, imgThresholded, 190, 255, ThresholdType.Binary);
ocr.SetImage(imgThresholded);
characters = ocr.GetCharacters();
imageColor = imgThresholded;
}
}
foreach (Tesseract.Character c in characters)
{
CvInvoke.Rectangle(imageColor, c.Region, drawCharColor.MCvScalar);
}
return ocr.GetText();
}
List<Tesseract.Character> allChars = new List<Tesseract.Character>();
String allText = String.Empty;
foreach (Rectangle rect in regions)
else
{
using (Mat region = new Mat(image, rect))
{
ocr.Recognize(region);
Tesseract.Character[] characters = ocr.GetCharacters();
//convert the coordinates from the local region to global
for (int i = 0; i < characters.Length; i++)
{
Rectangle charRegion = characters[i].Region;
charRegion.Offset(rect.Location);
characters[i].Region = charRegion;
bool checkInvert = true;
Rectangle[] regions;
using (
ERFilterNM1 er1 = new ERFilterNM1("trained_classifierNM1.xml", 8, 0.00025f, 0.13f, 0.4f, true, 0.1f))
using (ERFilterNM2 er2 = new ERFilterNM2("trained_classifierNM2.xml", 0.3f))
{
int channelCount = image.NumberOfChannels;
UMat[] channels = new UMat[checkInvert ? channelCount * 2 : channelCount];
for (int i = 0; i < channelCount; i++)
{
UMat c = new UMat();
CvInvoke.ExtractChannel(image, c, i);
channels[i] = c;
}
if (checkInvert)
{
for (int i = 0; i < channelCount; i++)
{
UMat c = new UMat();
CvInvoke.BitwiseNot(channels[i], c);
channels[i + channelCount] = c;
}
}
VectorOfERStat[] regionVecs = new VectorOfERStat[channels.Length];
for (int i = 0; i < regionVecs.Length; i++)
regionVecs[i] = new VectorOfERStat();
try
{
for (int i = 0; i < channels.Length; i++)
{
er1.Run(channels[i], regionVecs[i]);
er2.Run(channels[i], regionVecs[i]);
}
using (VectorOfUMat vm = new VectorOfUMat(channels))
{
regions = ERFilter.ERGrouping(image, vm, regionVecs, ERFilter.GroupingMethod.OrientationHoriz,
"trained_classifier_erGrouping.xml", 0.5f);
}
}
finally
{
foreach (UMat tmp in channels)
if (tmp != null)
tmp.Dispose();
foreach (VectorOfERStat tmp in regionVecs)
if (tmp != null)
tmp.Dispose();
}
Rectangle imageRegion = new Rectangle(Point.Empty, imageColor.Size);
for (int i = 0; i < regions.Length; i++)
{
Rectangle r = ScaleRectangle(regions[i], 1.1);
r.Intersect(imageRegion);
regions[i] = r;
}
}
List<Tesseract.Character> allChars = new List<Tesseract.Character>();
String allText = String.Empty;
foreach (Rectangle rect in regions)
{
using (Mat region = new Mat(image, rect))
{
ocr.SetImage(region);
Tesseract.Character[] characters = ocr.GetCharacters();
//convert the coordinates from the local region to global
for (int i = 0; i < characters.Length; i++)
{
Rectangle charRegion = characters[i].Region;
charRegion.Offset(rect.Location);
characters[i].Region = charRegion;
}
allChars.AddRange(characters);
allText += ocr.GetText() + Environment.NewLine;
}
}
Bgr drawRegionColor = new Bgr(Color.Red);
foreach (Rectangle rect in regions)
{
CvInvoke.Rectangle(imageColor, rect, drawRegionColor.MCvScalar);
}
foreach (Tesseract.Character c in allChars)
{
CvInvoke.Rectangle(imageColor, c.Region, drawCharColor.MCvScalar);
}
return allText;
}
allChars.AddRange(characters);
allText += ocr.GetText() + Environment.NewLine;
}
}
Bgr drawRegionColor = new Bgr(Color.Red);
foreach (Rectangle rect in regions)
{
CvInvoke.Rectangle(imageColor, rect, drawRegionColor.MCvScalar);
}
foreach (Tesseract.Character c in allChars)
{
CvInvoke.Rectangle(imageColor, c.Region, drawCharColor.MCvScalar);
}
return allText;
}
}
}
private void OcrImage(Mat source)
{
imageBox1.Image = null;
ocrTextBox.Text = String.Empty;
hocrTextBox.Text = String.Empty;
private void OcrImage(Mat source)
{
imageBox1.Image = null;
ocrTextBox.Text = String.Empty;
hocrTextBox.Text = String.Empty;
#if !DEBUG
try
{
Mat result = new Mat();
String ocredText = OcrImage(_ocr, source, Mode, result);
imageBox1.Image = result;
ocrTextBox.Text = ocredText;
if (Mode == OCRMode.FullPage)
#endif
{
hocrTextBox.Text = _ocr.GetHOCRText();
}
}
Mat result = new Mat();
String ocredText = OcrImage(_ocr, source, Mode, result);
imageBox1.Image = result;
ocrTextBox.Text = ocredText;
if (Mode == OCRMode.FullPage)
{
hocrTextBox.Text = _ocr.GetHOCRText();
}
}
#if !DEBUG
catch (Exception exception)
{
MessageBox.Show(exception.Message);
}
}
#endif
}
private void loadImageButton_Click(object sender, EventArgs e)
{
if (openImageFileDialog.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
fileNameTextBox.Text = openImageFileDialog.FileName;
Mat source = new Mat(fileNameTextBox.Text);
OcrImage(source);
}
}
private void loadImageButton_Click(object sender, EventArgs e)
{
if (openImageFileDialog.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
fileNameTextBox.Text = openImageFileDialog.FileName;
Mat source = new Mat(fileNameTextBox.Text);
OcrImage(source);
}
}
private void loadLanguageToolStripMenuItem_Click(object sender, EventArgs e)
{
if (openLanguageFileDialog.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
string path = Path.GetDirectoryName(openLanguageFileDialog.FileName);
string lang = Path.GetFileNameWithoutExtension(openLanguageFileDialog.FileName).Split('.')[0];
private void loadLanguageToolStripMenuItem_Click(object sender, EventArgs e)
{
if (openLanguageFileDialog.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
string path = Path.GetDirectoryName(openLanguageFileDialog.FileName);
string lang = Path.GetFileNameWithoutExtension(openLanguageFileDialog.FileName).Split('.')[0];
InitOcr(path, lang, OcrEngineMode.Default);
InitOcr(path, lang, OcrEngineMode.Default);
}
}
}
}
}
}
}

2
Emgu.CV.Extern/tesseract/libtesseract/tesseract-ocr.git

@ -1 +1 @@
Subproject commit ea68f9fa0f6435560c5d4a61b2333820d0c8c4b8
Subproject commit a80cfa7630e14374a163441a38d4124c8a1102ea

32
Emgu.CV.Extern/tesseract/tesseract.cpp

@ -31,20 +31,20 @@ void TessBaseAPIRelease(EmguTesseract** ocr)
delete *ocr;
}
/*
void TessBaseAPIRecognizeImage(EmguTesseract* ocr, IplImage* image)
int TessBaseAPIRecognize(EmguTesseract* ocr)
{
ocr->SetImage( (const unsigned char*)image->imageData, image->width, image->height, image->nChannels, image->widthStep);
if (ocr->Recognize(NULL) != 0)
CV_Error(CV_StsError, "Tesseract engine: Recognize Failed");
}*/
return ocr->Recognize(NULL);
}
void TessBaseAPIRecognizeArray(EmguTesseract* ocr, cv::_InputArray* mat)
void TessBaseAPISetImage(EmguTesseract* ocr, cv::_InputArray* mat)
{
cv::Mat m = mat->getMat();
ocr->SetImage((const unsigned char*) m.data, m.size().width, m.size().height, m.channels(), m.step);
if (ocr->Recognize(NULL) != 0)
CV_Error(CV_StsError, "Tesseract engine: Recognize Failed");
ocr->SetImage(static_cast<const unsigned char*>(m.data), m.size().width, m.size().height, m.elemSize(), m.step);
}
void TessBaseAPISetImagePix(EmguTesseract* ocr, Pix* pix)
{
ocr -> SetImage(pix);
}
void TessBaseAPIGetUTF8Text(EmguTesseract* ocr, std::vector<unsigned char>* vectorOfByte)
@ -118,7 +118,6 @@ bool TessBaseAPISetVariable(EmguTesseract* ocr, const char* varName, const char*
return ocr->SetVariable(varName, value);
}
void TessBaseAPISetPageSegMode(EmguTesseract* ocr, tesseract::PageSegMode mode)
{
ocr->SetPageSegMode(mode);
@ -163,6 +162,11 @@ int TessBaseAPIIsValidWord(EmguTesseract* ocr, char* word)
return ocr->IsValidWord(word);
}
int TessBaseAPIGetOem(EmguTesseract* ocr)
{
return ocr->oem();
}
Pix* leptCreatePixFromMat(cv::Mat* m)
{
const unsigned char* imagedata = m->data;
@ -223,4 +227,10 @@ Pix* leptCreatePixFromMat(cv::Mat* m)
}
pixSetYRes(pix, 300);
return pix;
}
void leptPixDestroy(Pix** pix)
{
pixDestroy(pix);
*pix = 0;
}

9
Emgu.CV.Extern/tesseract/tesseract_c.h

@ -62,9 +62,10 @@ CVAPI(int) TessBaseAPIInit(EmguTesseract* ocr, const char* dataPath, const char*
CVAPI(void) TessBaseAPIRelease(EmguTesseract** ocr);
//CVAPI(void) TessBaseAPIRecognizeImage(EmguTesseract* ocr, IplImage* image);
CVAPI(int) TessBaseAPIRecognize(EmguTesseract* ocr);
CVAPI(void) TessBaseAPIRecognizeArray(EmguTesseract* ocr, cv::_InputArray* mat);
CVAPI(void) TessBaseAPISetImage(EmguTesseract* ocr, cv::_InputArray* mat);
CVAPI(void) TessBaseAPISetImagePix(EmguTesseract* ocr, Pix* pix);
CVAPI(void) TessBaseAPIGetUTF8Text(EmguTesseract* ocr, std::vector<unsigned char>* vectorOfByte);
@ -93,5 +94,9 @@ CVAPI(bool) TessPageIteratorGetBaseLine(
CVAPI(int) TessBaseAPIIsValidWord(EmguTesseract* ocr, char* word);
CVAPI(int) TessBaseAPIGetOem(EmguTesseract* ocr);
CVAPI(Pix*) leptCreatePixFromMat(cv::Mat* m);
CVAPI(void) leptPixDestroy(Pix** pix);
#endif

147
Emgu.CV.OCR/OcrInvoke.cs

@ -15,95 +15,108 @@ using System.Diagnostics;
namespace Emgu.CV.OCR
{
/// <summary>
/// Library to invoke Tesseract OCR functions
/// </summary>
public static class OcrInvoke
{
static OcrInvoke()
{
//dummy code that is used to involve the static constructor of CvInvoke, if it has not already been called.
CvInvoke.CheckLibraryLoaded();
}
#region Tesseract
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern IntPtr TessBaseAPICreate();
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern int TessBaseAPIInit(
IntPtr ocr,
[MarshalAs(CvInvoke.StringMarshalType)]
/// <summary>
/// Library to invoke Tesseract OCR functions
/// </summary>
public static class OcrInvoke
{
static OcrInvoke()
{
//dummy code that is used to involve the static constructor of CvInvoke, if it has not already been called.
CvInvoke.CheckLibraryLoaded();
}
#region Tesseract
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern IntPtr TessBaseAPICreate();
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern int TessBaseAPIInit(
IntPtr ocr,
[MarshalAs(CvInvoke.StringMarshalType)]
String dataPath,
[MarshalAs(CvInvoke.StringMarshalType)]
[MarshalAs(CvInvoke.StringMarshalType)]
String language,
OcrEngineMode mode);
OcrEngineMode mode);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessBaseAPIRelease(ref IntPtr ocr);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessBaseAPIRelease(ref IntPtr ocr);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessBaseAPIRecognizeArray(IntPtr ocr, IntPtr image);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessBaseAPISetImage(IntPtr ocr, IntPtr image);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessBaseAPIGetUTF8Text(
IntPtr ocr,
IntPtr text);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessBaseAPISetImagePix(IntPtr ocr, IntPtr pix);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessBaseAPIGetHOCRText(IntPtr ocr, int pageNumber, IntPtr vectorOfByte);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessBaseAPIGetUTF8Text(
IntPtr ocr,
IntPtr text);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessBaseAPIExtractResult(IntPtr ocr, IntPtr charSeq, IntPtr resultSeq);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessBaseAPIGetHOCRText(IntPtr ocr, int pageNumber, IntPtr vectorOfByte);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
[return: MarshalAs(CvInvoke.BoolMarshalType)]
internal static extern bool TessBaseAPISetVariable(
IntPtr ocr,
[MarshalAs(CvInvoke.StringMarshalType)]
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessBaseAPIExtractResult(IntPtr ocr, IntPtr charSeq, IntPtr resultSeq);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
[return: MarshalAs(CvInvoke.BoolMarshalType)]
internal static extern bool TessBaseAPISetVariable(
IntPtr ocr,
[MarshalAs(CvInvoke.StringMarshalType)]
String varName,
[MarshalAs(CvInvoke.StringMarshalType)]
[MarshalAs(CvInvoke.StringMarshalType)]
String value);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern IntPtr TesseractGetVersion();
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern IntPtr TesseractGetVersion();
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessBaseAPISetPageSegMode(IntPtr ocr, PageSegMode mode);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessBaseAPISetPageSegMode(IntPtr ocr, PageSegMode mode);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern PageSegMode TessBaseAPIGetPageSegMode(IntPtr ocr);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern PageSegMode TessBaseAPIGetPageSegMode(IntPtr ocr);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern int TessBaseAPIGetOpenCLDevice(IntPtr ocr, ref IntPtr device);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern int TessBaseAPIGetOpenCLDevice(IntPtr ocr, ref IntPtr device);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern IntPtr TessBaseAPIAnalyseLayout(
IntPtr ocr,
[MarshalAs(CvInvoke.BoolMarshalType)]
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern IntPtr TessBaseAPIAnalyseLayout(
IntPtr ocr,
[MarshalAs(CvInvoke.BoolMarshalType)]
bool mergeSimilarWords);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern int TessBaseAPIIsValidWord(
IntPtr ocr,
[MarshalAs(CvInvoke.StringMarshalType)]
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern int TessBaseAPIIsValidWord(
IntPtr ocr,
[MarshalAs(CvInvoke.StringMarshalType)]
String word);
#endregion
#endregion
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessPageIteratorRelease(ref IntPtr iterator);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessPageIteratorGetOrientation(IntPtr iterator, ref PageOrientation orientation, ref WritingDirection writingDirection, ref TextlineOrder order, ref float deskewAngle);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessPageIteratorRelease(ref IntPtr iterator);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
[return: MarshalAs(CvInvoke.BoolMarshalType)]
internal static extern bool TessPageIteratorGetBaseLine(
IntPtr iterator,
PageIteratorLevel level,
ref int x1, ref int y1, ref int x2, ref int y2);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void TessPageIteratorGetOrientation(IntPtr iterator, ref PageOrientation orientation, ref WritingDirection writingDirection, ref TextlineOrder order, ref float deskewAngle);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern OcrEngineMode TessBaseAPIGetOem(IntPtr ocr);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
[return: MarshalAs(CvInvoke.BoolMarshalType)]
internal static extern bool TessPageIteratorGetBaseLine(
IntPtr iterator,
PageIteratorLevel level,
ref int x1, ref int y1, ref int x2, ref int y2);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern int TessBaseAPIRecognize(IntPtr ocr);
}
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern IntPtr leptCreatePixFromMat(IntPtr m);
[DllImport(CvInvoke.ExternLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
internal static extern void leptPixDestroy(ref IntPtr pix);
}
}

34
Emgu.CV.OCR/Pix.cs

@ -0,0 +1,34 @@
//----------------------------------------------------------------------------
// Copyright (C) 2004-2017 by EMGU Corporation. All rights reserved.
//----------------------------------------------------------------------------
using Emgu.CV.Util;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Runtime.InteropServices;
using System.Text;
using System.IO;
using Emgu.CV;
using Emgu.CV.Structure;
using Emgu.Util;
using System.Diagnostics;
namespace Emgu.CV.OCR
{
public class Pix : UnmanagedObject
{
public Pix(Mat mat)
{
_ptr = OcrInvoke.leptCreatePixFromMat(mat);
}
protected override void DisposeObject()
{
if (_ptr != IntPtr.Zero)
{
OcrInvoke.leptPixDestroy(ref _ptr);
}
}
}
}

41
Emgu.CV.OCR/Tesseract.cs

@ -260,10 +260,29 @@ namespace Emgu.CV.OCR
/// Set the image for optical character recognition
/// </summary>
/// <param name="image">The image where detection took place</param>
public void Recognize(IInputArray image)
public void SetImage(IInputArray image)
{
using (InputArray iaImage = image.GetInputArray())
OcrInvoke.TessBaseAPIRecognizeArray(_ptr, iaImage);
OcrInvoke.TessBaseAPISetImage(_ptr, iaImage);
}
/// <summary>
/// Set the image for optical character recognition
/// </summary>
/// <param name="image">The image where detection took place</param>
public void SetImage(Pix image)
{
OcrInvoke.TessBaseAPISetImagePix(_ptr, image);
}
/// <summary>
/// Recognize the image from SetAndThresholdImage, generating Tesseract
/// internal structures.
/// </summary>
/// <returns>Returns 0 on success.</returns>
public int Recognize()
{
return OcrInvoke.TessBaseAPIRecognize(_ptr);
}
/// <summary>
@ -309,8 +328,8 @@ namespace Emgu.CV.OCR
private String UtfByteVectorToString(VectorOfByte bytes)
{
#if NETFX_CORE
byte[] bArr = bytes.ToArray();
return _utf8.GetString(bArr, 0, bArr.Length).Replace("\n", Environment.NewLine);
byte[] bArr = bytes.ToArray();
return _utf8.GetString(bArr, 0, bArr.Length).Replace("\n", Environment.NewLine);
#else
return _utf8.GetString(bytes.ToArray()).Replace("\n", Environment.NewLine);
#endif
@ -325,8 +344,6 @@ namespace Emgu.CV.OCR
using (VectorOfByte textSeq = new VectorOfByte())
using (VectorOfTesseractResult results = new VectorOfTesseractResult())
{
//Seq<byte> textSeq = new Seq<byte>(stor);
//Seq<TesseractResult> results = new Seq<TesseractResult>(stor);
OcrInvoke.TessBaseAPIExtractResult(_ptr, textSeq, results);
byte[] bytes = textSeq.ToArray();
@ -378,10 +395,18 @@ namespace Emgu.CV.OCR
{
return new PageIterator(OcrInvoke.TessBaseAPIAnalyseLayout(_ptr, mergeSimilarWords));
}
/// <summary>
/// Get the OCR Engine Mode
/// </summary>
public OcrEngineMode Oem
{
get { return OcrInvoke.TessBaseAPIGetOem(_ptr); }
}
}
/// <summary>
/// When Tesseract/Cube is initialized we can choose to instantiate/load/run
/// When Tesseract/LSTM is initialized we can choose to instantiate/load/run
/// only the Tesseract part, only the Cube part or both along with the combiner.
/// The preference of which engine to use is stored in tessedit_ocr_engine_mode.
/// </summary>
@ -479,6 +504,6 @@ namespace Emgu.CV.OCR
/// Number of enum entries.
/// </summary>
Count
};
}
}

130
Emgu.CV/PInvoke/CvInvokeCore.cs

@ -1519,136 +1519,6 @@ namespace Emgu.CV
#endregion
/*
#region Memory Storages
/// <summary>
/// Creates a memory storage and returns pointer to it. Initially the storage is empty. All fields of the header, except the block_size, are set to 0.
/// </summary>
/// <param name="blockSize"></param>
/// <returns>Size of the storage blocks in bytes. If it is 0, the block size is set to default value - currently it is 64K. </returns>
[DllImport(OpencvCoreLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
public static extern IntPtr cvCreateMemStorage(int blockSize);
/// <summary>
/// Creates a child memory storage that is similar to simple memory storage except for the differences in the memory allocation/deallocation mechanism. When a child storage needs a new block to add to the block list, it tries to get this block from the parent. The first unoccupied parent block available is taken and excluded from the parent block list. If no blocks are available, the parent either allocates a block or borrows one from its own parent, if any. In other words, the chain, or a more complex structure, of memory storages where every storage is a child/parent of another is possible. When a child storage is released or even cleared, it returns all blocks to the parent. In other aspects, the child storage is the same as the simple storage.
/// </summary>
/// <param name="parent">Parent memory storage</param>
/// <returns>ChildMemStorage</returns>
[DllImport(OpencvCoreLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
public static extern IntPtr cvCreateChildMemStorage(IntPtr parent);
/// <summary>
/// Resets the top (free space boundary) of the storage to the very beginning. This function does not deallocate any memory. If the storage has a parent, the function returns all blocks to the parent.
/// </summary>
/// <param name="storage">Memory storage</param>
[DllImport(OpencvCoreLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
public static extern void cvClearMemStorage(IntPtr storage);
/// <summary>
/// Deallocates all storage memory blocks or returns them to the parent, if any. Then it deallocates the storage header and clears the pointer to the storage. All children of the storage must be released before the parent is released.
/// </summary>
/// <param name="storage">Pointer to the released storage</param>
[DllImport(OpencvCoreLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
public static extern void cvReleaseMemStorage(ref IntPtr storage);
#endregion
/// <summary>
/// Loads object from file. It provides a simple interface to cvRead. After object is loaded, the file storage is closed and all the temporary buffers are deleted. Thus, to load a dynamic structure, such as sequence, contour or graph, one should pass a valid destination memory storage to the function.
/// </summary>
/// <param name="fileName">File name</param>
/// <param name="memstorage">Memory storage for dynamic structures, such as CvSeq or CvGraph. It is not used for matrices or images</param>
/// <param name="name">Optional object name. If it is IntPtr.Zero, the first top-level object in the storage will be loaded</param>
/// <param name="realName">Optional output parameter that will contain name of the loaded object (useful if name=IntPtr.Zero). </param>
/// <returns>Loaded object from file</returns>
[DllImport(OpencvCoreLibrary, CallingConvention = CvInvoke.CvCallingConvention, EntryPoint = "cvLoad")]
private static extern IntPtr _cvLoad(
[MarshalAs(StringMarshalType)] String fileName,
IntPtr memstorage,
[MarshalAs(StringMarshalType)] String name,
IntPtr realName);
/// <summary>
/// Loads object from file. It provides a simple interface to cvRead. After object is loaded, the file storage is closed and all the temporary buffers are deleted. Thus, to load a dynamic structure, such as sequence, contour or graph, one should pass a valid destination memory storage to the function.
/// </summary>
/// <param name="fileName">File name</param>
/// <param name="memstorage">Memory storage for dynamic structures, such as CvSeq or CvGraph. It is not used for matrices or images</param>
/// <param name="name">Optional object name. If it is NULL, the first top-level object in the storage will be loaded</param>
/// <param name="realName">Optional output parameter that will contain name of the loaded object (useful if name=NULL). </param>
/// <returns>Loaded object from file</returns>
public static IntPtr cvLoad(string fileName, IntPtr memstorage, string name, IntPtr realName)
{
try
{
return _cvLoad(fileName, memstorage, name, realName);
}
catch (CvException)
{
//cv.dll needed to be load before creating HaarCascade object
//creating the following dummy will do the job
//a bug(?) in OpenCV
//see http://opencvlibrary.sourceforge.net/FaceDetection
//after step 11 there is an explanation
using (Image<Gray, Byte> dummy = new Image<Gray, Byte>(1, 1))
{
dummy._Erode(1);
}
//After OpenCV structure reorganization, opencv_objdetect is needed in memory
//This can be done by loading the HOG detector
using (HOGDescriptor desc = new HOGDescriptor())
{
}
return _cvLoad(fileName, memstorage, name, realName);
}
}
/// <summary>
/// Creates a sequence that represents the specified slice of the input sequence. The new sequence either shares the elements with the original sequence or has own copy of the elements. So if one needs to process a part of sequence but the processing function does not have a slice parameter, the required sub-sequence may be extracted using this function.
/// </summary>
/// <param name="seq">Sequence</param>
/// <param name="slice">The part of the sequence to extract</param>
/// <param name="storage">The destination storage to keep the new sequence header and the copied data if any. If it is IntPtr.Zero, the function uses the storage containing the input sequence.</param>
/// <param name="copyData">The flag that indicates whether to copy the elements of the extracted slice (copy_data!=0) or not (copy_data=0)</param>
/// <returns>A pointer to CvSeq</returns>
#if ANDROID
public static IntPtr cvSeqSlice(
IntPtr seq,
MCvSlice slice,
IntPtr storage,
int copyData)
{
return cvSeqSlice(seq, slice.start_index, slice.end_index, storage, copyData);
}
[DllImport(OpencvCoreLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
private static extern IntPtr cvSeqSlice(
IntPtr seq,
int startIndex, int endIndex,
IntPtr storage,
int copyData);
#else
[DllImport(OpencvCoreLibrary, CallingConvention = CvInvoke.CvCallingConvention)]
public static extern IntPtr cvSeqSlice(
IntPtr seq,
MCvSlice slice,
IntPtr storage,
int copyData);
#endif
/// <summary>
/// Creates a sequence that represents the specified slice of the input sequence. The new sequence either shares the elements with the original sequence or has own copy of the elements. So if one needs to process a part of sequence but the processing function does not have a slice parameter, the required sub-sequence may be extracted using this function.
/// </summary>
/// <param name="seq">Sequence</param>
/// <param name="slice">The part of the sequence to extract</param>
/// <param name="storage">The destination storage to keep the new sequence header and the copied data if any. If it is IntPtr.Zero, the function uses the storage containing the input sequence.</param>
/// <param name="copyData">The flag that indicates whether to copy the elements of the extracted slice (copyData == true) or not (copyData=false)</param>
/// <returns>A pointer to CvSeq</returns>
public static IntPtr cvSeqSlice(IntPtr seq, MCvSlice slice, IntPtr storage, bool copyData)
{
return cvSeqSlice(seq, slice, storage, copyData ? 1 : 0);
}*/
/// <summary>
/// Assign the new value to the particular element of single-channel array
/// </summary>

Loading…
Cancel
Save