I am trying to use http://ift.tt/1hNCkae library to read text from shopping receipt.
MyViewController.h
#import <TesseractOCR/TesseractOCR.h>
@interface MyViewController : UIViewController <G8TesseractDelegate>
@end
MyViewController.m
@implementation MyViewController
- (void)viewDidLoad
{
[super viewDidLoad];
// Languages are used for recognition (e.g. eng, ita, etc.). Tesseract engine
// will search for the .traineddata language file in the tessdata directory.
// For example, specifying "eng+ita" will search for "eng.traineddata" and
// "ita.traineddata". Cube engine will search for "eng.cube.*" files.
// See http://ift.tt/1go8FEN.
// Create your G8Tesseract object using the initWithLanguage method:
G8Tesseract *tesseract = [[G8Tesseract alloc] initWithLanguage:@"eng"];
// Optionaly: You could specify engine to recognize with.
// G8OCREngineModeTesseractOnly by default. It provides more features and faster
// than Cube engine. See G8Constants.h for more information.
//tesseract.engineMode = G8OCREngineModeTesseractOnly;
// Set up the delegate to receive Tesseract's callbacks.
// self should respond to TesseractDelegate and implement a
// "- (BOOL)shouldCancelImageRecognitionForTesseract:(G8Tesseract *)tesseract"
// method to receive a callback to decide whether or not to interrupt
// Tesseract before it finishes a recognition.
tesseract.delegate = self;
// Optional: Limit the character set Tesseract should try to recognize from
tesseract.charWhitelist = @"0123456789";
// This is wrapper for common Tesseract variable kG8ParamTesseditCharWhitelist:
// [tesseract setVariableValue:@"0123456789" forKey:kG8ParamTesseditCharBlacklist];
// See G8TesseractParameters.h for a complete list of Tesseract variables
// Optional: Limit the character set Tesseract should not try to recognize from
//tesseract.charBlacklist = @"OoZzBbSs";
// Specify the image Tesseract should recognize on
tesseract.image = [[UIImage imageNamed:@"walmart_receipt.png"] g8_blackAndWhite];
// Optional: Limit the area of the image Tesseract should recognize on to a rectangle
tesseract.rect = CGRectMake(20, 20, 100, 100);
// Optional: Limit recognition time with a few seconds
tesseract.maximumRecognitionTime = 2.0;
// Start the recognition
[tesseract recognize];
// Retrieve the recognized text
NSLog(@"Text:%@", [tesseract recognizedText]);
// You could retrieve more information about recognized text with that methods:
NSArray *characterBoxes = [tesseract recognizedBlocksByIteratorLevel:G8PageIteratorLevelSymbol];
NSLog(@"characterBoxes:%@", characterBoxes);
NSArray *paragraphs = [tesseract recognizedBlocksByIteratorLevel:G8PageIteratorLevelParagraph];
NSLog(@"paragraphs:%@", paragraphs);
NSArray *characterChoices = tesseract.characterChoices;
NSLog(@"characterChoices:%@", characterChoices);
UIImage *imageWithBlocks = [tesseract imageWithBlocks:characterBoxes drawText:YES thresholded:NO];
}
@end
When I run my project, I see output as
2015-03-01 12:38:04.888 testImage[45600:70b] Text: 13
53 142 11
2015-03-01 12:38:04.889 testImage[45600:70b] characterBoxes:(
"(2.56%) ' '",
"(74.74%) '1'",
"(69.03%) '3'",
"(89.08%) '5'",
"(72.80%) '3'",
"(22.93%) ' '",
"(78.33%) '1'",
"(67.23%) '4'",
"(70.94%) '2'",
"(15.52%) ' '",
"(80.01%) '1'",
"(68.51%) '1'"
)
2015-03-01 12:38:04.890 testImage[45600:70b] paragraphs:(
"(13.67%) ' 13\n53 142 11\n\n'"
)
2015-03-01 12:38:04.890 testImage[45600:70b] characterChoices:(
(
"(2.56%) ' '"
),
(
"(74.74%) '1'"
),
(
"(69.03%) '3'"
),
(
"(89.08%) '5'"
),
(
"(72.80%) '3'"
),
(
"(22.93%) ' '"
),
(
"(78.33%) '1'"
),
(
"(67.23%) '4'"
),
(
"(70.94%) '2'"
),
(
"(5.45%) ' '"
),
(
"(80.01%) '1'"
),
(
"(68.51%) '1'"
)
)
Which looks no way closer to text on my image (attached)
What am I missing?
first crop image and use black white of tessrect
RépondreSupprimertesseract.image = [ CropppedColorUiImage g8_blackAndWhite];
then read only small parts