The solution must be universal (working with different fonts and colors) and stable.
Input data is point with X, Y coordinates and output data is rectangle or more comples shape, which contains word or phrase.
Now i am using tesseract recognition of entire image with hocr option, then extract all rectangles from output html and finally find the nearest to point reactangle. Code shown below. But it's inefficiently, beacuse of entire image recognition.
Off course, it's possible to recognize not entire image, but part, but it's not a clear solution too, because of different font sizes and useless words recognitions all the same.
public class WordRecognizerTesseractHocr
const string HelperFileName = "temp";
public string NextVariant()
Bitmap.Save(HelperFileName + ".png", ImageFormat.Png);
var startInfo = new ProcessStartInfo("tesseract.exe", HelperFileName + ".png temp hocr");
startInfo.WindowStyle = ProcessWindowStyle.Hidden;
var process = Process.Start(startInfo);
var result = GetNearestWord(File.ReadAllText(HelperFileName + ".html"), Position);
return result;
public string GetNearestWord(string tesseractHtml, Point position)
var xml = XDocument.Parse(tesseractHtml);
RectsWords = new Dictionary<Rectangle, string>();
var ocr_words = xml.Descendants("span").Where(element => element.Attribute("class").Value == "ocr_word").ToList();
foreach (var ocr_word in ocr_words)
var strs = ocr_word.Attribute("title").Value.Split(' ');
int left = int.Parse(strs[1]);
int top = int.Parse(strs[2]);
int width = int.Parse(strs[3]) - left + 1;
int height = int.Parse(strs[4]) - top + 1;
RectsWords.Add(new Rectangle(left, top, width, height), ocr_word.Value);
var nearestWords = RectsWords.OrderBy(rectWord => Distance(position, rectWord.Key));
return nearestWords.Count() != 0 ? nearestWords.First().Value : string.Empty;
public static double Distance(Point pos, Rectangle rect)
if (pos.X < rect.Left)
if (pos.Y < rect.Top)
return Math.Sqrt((rect.X - pos.X) * (rect.X - pos.X) + (rect.Top - pos.Y) * (rect.Top - pos.Y));
else if (pos.Y < rect.Top + rect.Height)
return rect.Left - pos.X;
return Math.Sqrt((rect.X - pos.X) * (rect.X - pos.X) +
(rect.Top + rect.Height - 1 - pos.Y) * (rect.Top + rect.Height - 1 - pos.Y));
else if (pos.X < rect.Left + rect.Width)
if (pos.Y < rect.Top)
return rect.Top - pos.Y;
else if (pos.Y < rect.Top + rect.Height)
return 0;
return pos.Y - (rect.Top + rect.Height - 1);
if (pos.Y < rect.Top)
return Math.Sqrt((rect.X + rect.Width - 1 - pos.X) * (rect.X + rect.Width - 1 - pos.X) +
(rect.Top - pos.Y) * (rect.Top - pos.Y));
else if (pos.Y < rect.Top + rect.Height)
return pos.X - (rect.Left + rect.Width - 1);
return Math.Sqrt((rect.X + rect.Width - 1 - pos.X) * (rect.X + rect.Width - 1 - pos.X) +
(rect.Top + rect.Height - 1 - pos.Y) * (rect.Top + rect.Height - 1 - pos.Y));
public IDictionary<Rectangle, string> RectsWords
protected set;