Tesseract
3.02
|
00001 /****************************************************************** 00002 * File: cube_control.cpp 00003 * Description: Tesseract class methods for invoking cube convolutional 00004 * neural network word recognizer. 00005 * Author: Raquel Romano 00006 * Created: September 2009 00007 * 00008 **********************************************************************/ 00009 00010 // Include automatically generated configuration file if running autoconf. 00011 #ifdef HAVE_CONFIG_H 00012 #include "config_auto.h" 00013 #endif 00014 00015 #include "allheaders.h" 00016 00017 #include "cube_object.h" 00018 #include "cube_reco_context.h" 00019 #include "tesseractclass.h" 00020 #include "tesseract_cube_combiner.h" 00021 00022 namespace tesseract { 00023 00024 /********************************************************************** 00025 * convert_prob_to_tess_certainty 00026 * 00027 * Normalize a probability in the range [0.0, 1.0] to a tesseract 00028 * certainty in the range [-20.0, 0.0] 00029 **********************************************************************/ 00030 static float convert_prob_to_tess_certainty(float prob) { 00031 return (prob - 1.0) * 20.0; 00032 } 00033 00034 /********************************************************************** 00035 * char_box_to_tbox 00036 * 00037 * Create a TBOX from a character bounding box. If nonzero, the 00038 * x_offset accounts for any additional padding of the word box that 00039 * should be taken into account. 00040 * 00041 **********************************************************************/ 00042 TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) { 00043 l_int32 left; 00044 l_int32 top; 00045 l_int32 width; 00046 l_int32 height; 00047 l_int32 right; 00048 l_int32 bottom; 00049 00050 boxGetGeometry(char_box, &left, &top, &width, &height); 00051 left += word_box.left() - x_offset; 00052 right = left + width; 00053 top = word_box.bottom() + word_box.height() - top; 00054 bottom = top - height; 00055 return TBOX(left, bottom, right, top); 00056 } 00057 00058 /********************************************************************** 00059 * extract_cube_state 00060 * 00061 * Extract CharSamp objects and character bounding boxes from the 00062 * CubeObject's state. The caller should free both structres. 00063 * 00064 **********************************************************************/ 00065 bool Tesseract::extract_cube_state(CubeObject* cube_obj, 00066 int* num_chars, 00067 Boxa** char_boxes, 00068 CharSamp*** char_samples) { 00069 if (!cube_obj) { 00070 if (cube_debug_level > 0) { 00071 tprintf("Cube WARNING (extract_cube_state): Invalid cube object " 00072 "passed to extract_cube_state\n"); 00073 } 00074 return false; 00075 } 00076 00077 // Note that the CubeObject accessors return either the deslanted or 00078 // regular objects search object or beam search object, whichever 00079 // was used in the last call to Recognize() 00080 CubeSearchObject* cube_search_obj = cube_obj->SrchObj(); 00081 if (!cube_search_obj) { 00082 if (cube_debug_level > 0) { 00083 tprintf("Cube WARNING (Extract_cube_state): Could not retrieve " 00084 "cube's search object in extract_cube_state.\n"); 00085 } 00086 return false; 00087 } 00088 BeamSearch *beam_search_obj = cube_obj->BeamObj(); 00089 if (!beam_search_obj) { 00090 if (cube_debug_level > 0) { 00091 tprintf("Cube WARNING (Extract_cube_state): Could not retrieve " 00092 "cube's beam search object in extract_cube_state.\n"); 00093 } 00094 return false; 00095 } 00096 00097 // Get the character samples and bounding boxes by backtracking 00098 // through the beam search path 00099 int best_node_index = beam_search_obj->BestPresortedNodeIndex(); 00100 *char_samples = beam_search_obj->BackTrack( 00101 cube_search_obj, best_node_index, num_chars, NULL, char_boxes); 00102 if (!*char_samples) 00103 return false; 00104 return true; 00105 } 00106 00107 /********************************************************************** 00108 * create_cube_box_word 00109 * 00110 * Fill the given BoxWord with boxes from character bounding 00111 * boxes. The char_boxes have local coordinates w.r.t. the 00112 * word bounding box, i.e., the left-most character bbox of each word 00113 * has (0,0) left-top coord, but the BoxWord must be defined in page 00114 * coordinates. 00115 **********************************************************************/ 00116 bool Tesseract::create_cube_box_word(Boxa *char_boxes, 00117 int num_chars, 00118 TBOX word_box, 00119 BoxWord* box_word) { 00120 if (!box_word) { 00121 if (cube_debug_level > 0) { 00122 tprintf("Cube WARNING (create_cube_box_word): Invalid box_word.\n"); 00123 } 00124 return false; 00125 } 00126 00127 // Find the x-coordinate of left-most char_box, which could be 00128 // nonzero if the word image was padded before recognition took place. 00129 int x_offset = -1; 00130 for (int i = 0; i < num_chars; ++i) { 00131 Box* char_box = boxaGetBox(char_boxes, i, L_CLONE); 00132 if (x_offset < 0 || char_box->x < x_offset) { 00133 x_offset = char_box->x; 00134 } 00135 boxDestroy(&char_box); 00136 } 00137 00138 for (int i = 0; i < num_chars; ++i) { 00139 Box* char_box = boxaGetBox(char_boxes, i, L_CLONE); 00140 TBOX tbox = char_box_to_tbox(char_box, word_box, x_offset); 00141 boxDestroy(&char_box); 00142 box_word->InsertBox(i, tbox); 00143 } 00144 return true; 00145 } 00146 00147 /********************************************************************** 00148 * create_werd_choice 00149 * 00150 **********************************************************************/ 00151 static WERD_CHOICE *create_werd_choice( 00152 CharSamp** char_samples, 00153 int num_chars, 00154 const char* str, 00155 float certainty, 00156 const UNICHARSET &unicharset, 00157 CharSet* cube_char_set 00158 ) { 00159 // Insert unichar ids into WERD_CHOICE 00160 WERD_CHOICE *werd_choice = new WERD_CHOICE(&unicharset, num_chars); 00161 // within a word, cube recognizes the word in reading order. 00162 werd_choice->set_unichars_in_script_order(true); 00163 ASSERT_HOST(werd_choice != NULL); 00164 UNICHAR_ID uch_id; 00165 for (int i = 0; i < num_chars; ++i) { 00166 uch_id = cube_char_set->UnicharID(char_samples[i]->StrLabel()); 00167 if (uch_id != INVALID_UNICHAR_ID) 00168 werd_choice->append_unichar_id_space_allocated( 00169 uch_id, 1, 0.0, certainty); 00170 } 00171 00172 BLOB_CHOICE *blob_choice; 00173 BLOB_CHOICE_LIST *choices_list; 00174 BLOB_CHOICE_IT choices_list_it; 00175 BLOB_CHOICE_LIST_CLIST *blob_choices = new BLOB_CHOICE_LIST_CLIST(); 00176 BLOB_CHOICE_LIST_C_IT blob_choices_it; 00177 blob_choices_it.set_to_list(blob_choices); 00178 00179 for (int i = 0; i < werd_choice->length(); ++i) { 00180 // Create new BLOB_CHOICE_LIST for this unichar 00181 choices_list = new BLOB_CHOICE_LIST(); 00182 choices_list_it.set_to_list(choices_list); 00183 // Add a single BLOB_CHOICE to the list 00184 blob_choice = new BLOB_CHOICE(werd_choice->unichar_id(i), 00185 0.0, certainty, -1, -1, 0, 0, 0, false); 00186 choices_list_it.add_after_then_move(blob_choice); 00187 // Add list to the clist 00188 blob_choices_it.add_to_end(choices_list); 00189 } 00190 werd_choice->set_certainty(certainty); 00191 werd_choice->set_blob_choices(blob_choices); 00192 return werd_choice; 00193 } 00194 00195 /********************************************************************** 00196 * init_cube_objects 00197 * 00198 * Instantiates Tesseract object's CubeRecoContext and TesseractCubeCombiner. 00199 * Returns false if cube context could not be created or if load_combiner is 00200 * true, but the combiner could not be loaded. 00201 **********************************************************************/ 00202 bool Tesseract::init_cube_objects(bool load_combiner, 00203 TessdataManager *tessdata_manager) { 00204 ASSERT_HOST(cube_cntxt_ == NULL); 00205 ASSERT_HOST(tess_cube_combiner_ == NULL); 00206 00207 // Create the cube context object 00208 cube_cntxt_ = CubeRecoContext::Create(this, tessdata_manager, &unicharset); 00209 if (cube_cntxt_ == NULL) { 00210 if (cube_debug_level > 0) { 00211 tprintf("Cube WARNING (Tesseract::init_cube_objects()): Failed to " 00212 "instantiate CubeRecoContext\n"); 00213 } 00214 return false; 00215 } 00216 00217 // Create the combiner object and load the combiner net for target languages. 00218 if (load_combiner) { 00219 tess_cube_combiner_ = new tesseract::TesseractCubeCombiner(cube_cntxt_); 00220 if (!tess_cube_combiner_ || !tess_cube_combiner_->LoadCombinerNet()) { 00221 delete cube_cntxt_; 00222 cube_cntxt_ = NULL; 00223 if (tess_cube_combiner_ != NULL) { 00224 delete tess_cube_combiner_; 00225 tess_cube_combiner_ = NULL; 00226 } 00227 if (cube_debug_level > 0) 00228 tprintf("Cube ERROR (Failed to instantiate TesseractCubeCombiner\n"); 00229 return false; 00230 } 00231 } 00232 return true; 00233 } 00234 00235 /********************************************************************** 00236 * run_cube_combiner 00237 * 00238 * Iterates through tesseract's results and calls cube on each word, 00239 * combining the results with the existing tesseract result. 00240 **********************************************************************/ 00241 void Tesseract::run_cube_combiner(PAGE_RES *page_res) { 00242 if (page_res == NULL || tess_cube_combiner_ == NULL) 00243 return; 00244 PAGE_RES_IT page_res_it(page_res); 00245 // Iterate through the word results and call cube on each word. 00246 for (page_res_it.restart_page(); page_res_it.word () != NULL; 00247 page_res_it.forward()) { 00248 WERD_RES* word = page_res_it.word(); 00249 // Skip cube entirely if tesseract's certainty is greater than threshold. 00250 int combiner_run_thresh = convert_prob_to_tess_certainty( 00251 cube_cntxt_->Params()->CombinerRunThresh()); 00252 if (word->best_choice->certainty() >= combiner_run_thresh) { 00253 continue; 00254 } 00255 // Use the same language as Tesseract used for the word. 00256 Tesseract* lang_tess = word->tesseract; 00257 00258 // Setup a trial WERD_RES in which to classify with cube. 00259 WERD_RES cube_word; 00260 cube_word.InitForRetryRecognition(*word); 00261 CubeObject *cube_obj = lang_tess->cube_recognize_word( 00262 page_res_it.block()->block, &cube_word); 00263 if (cube_obj != NULL) 00264 lang_tess->cube_combine_word(cube_obj, &cube_word, word); 00265 delete cube_obj; 00266 } 00267 } 00268 00269 /********************************************************************** 00270 * cube_word_pass1 00271 * 00272 * Recognizes a single word using (only) cube. Compatible with 00273 * Tesseract's classify_word_pass1/classify_word_pass2. 00274 **********************************************************************/ 00275 void Tesseract::cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) { 00276 CubeObject *cube_obj = cube_recognize_word(block, word); 00277 delete cube_obj; 00278 } 00279 00280 /********************************************************************** 00281 * cube_recognize_word 00282 * 00283 * Cube recognizer to recognize a single word as with classify_word_pass1 00284 * but also returns the cube object in case the combiner is needed. 00285 **********************************************************************/ 00286 CubeObject* Tesseract::cube_recognize_word(BLOCK* block, WERD_RES* word) { 00287 if (!cube_binary_ || !cube_cntxt_) { 00288 if (cube_debug_level > 0 && !cube_binary_) 00289 tprintf("Tesseract::run_cube(): NULL binary image.\n"); 00290 word->SetupFake(unicharset); 00291 return NULL; 00292 } 00293 TBOX word_box = word->word->bounding_box(); 00294 if (block != NULL && (block->re_rotation().x() != 1.0f || 00295 block->re_rotation().y() != 0.0f)) { 00296 // TODO(rays) We have to rotate the bounding box to get the true coords. 00297 // This will be achieved in the future via DENORM. 00298 // In the mean time, cube can't process this word. 00299 if (cube_debug_level > 0) { 00300 tprintf("Cube can't process rotated word at:"); 00301 word_box.print(); 00302 } 00303 word->SetupFake(unicharset); 00304 return NULL; 00305 } 00306 CubeObject* cube_obj = new tesseract::CubeObject( 00307 cube_cntxt_, cube_binary_, word_box.left(), 00308 pixGetHeight(cube_binary_) - word_box.top(), 00309 word_box.width(), word_box.height()); 00310 if (!cube_recognize(cube_obj, block, word)) { 00311 delete cube_obj; 00312 return NULL; 00313 } 00314 return cube_obj; 00315 } 00316 00317 /********************************************************************** 00318 * cube_combine_word 00319 * 00320 * Combines the cube and tesseract results for a single word, leaving the 00321 * result in tess_word. 00322 **********************************************************************/ 00323 void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word, 00324 WERD_RES* tess_word) { 00325 float combiner_prob = tess_cube_combiner_->CombineResults(tess_word, 00326 cube_obj); 00327 // If combiner probability is greater than tess/cube combiner 00328 // classifier threshold, i.e. tesseract wins, then just return the 00329 // tesseract result unchanged, as the combiner knows nothing about how 00330 // correct the answer is. If cube and tesseract agree, then improve the 00331 // scores before returning. 00332 WERD_CHOICE* tess_best = tess_word->best_choice; 00333 WERD_CHOICE* cube_best = cube_word->best_choice; 00334 if (cube_debug_level || classify_debug_level) { 00335 tprintf("Combiner prob = %g vs threshold %g\n", 00336 combiner_prob, cube_cntxt_->Params()->CombinerClassifierThresh()); 00337 } 00338 if (combiner_prob >= 00339 cube_cntxt_->Params()->CombinerClassifierThresh()) { 00340 if (tess_best->unichar_string() == cube_best->unichar_string()) { 00341 // Cube and tess agree, so improve the scores. 00342 tess_best->set_rating(tess_best->rating() / 2); 00343 tess_best->set_certainty(tess_best->certainty() / 2); 00344 } 00345 return; 00346 } 00347 // Cube wins. 00348 // It is better for the language combiner to have all tesseract scores, 00349 // so put them in the cube result. 00350 cube_best->set_rating(tess_best->rating()); 00351 cube_best->set_certainty(tess_best->certainty()); 00352 if (cube_debug_level || classify_debug_level) { 00353 tprintf("Cube INFO: tesseract result replaced by cube: %s -> %s\n", 00354 tess_best->unichar_string().string(), 00355 cube_best->unichar_string().string()); 00356 } 00357 tess_word->ConsumeWordResults(cube_word); 00358 } 00359 00360 /********************************************************************** 00361 * cube_recognize 00362 * 00363 * Call cube on the current word, and write the result to word. 00364 * Sets up a fake result and returns false if something goes wrong. 00365 **********************************************************************/ 00366 bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block, 00367 WERD_RES *word) { 00368 if (!word->SetupForCubeRecognition(unicharset, this, block)) { 00369 return false; // Graphics block. 00370 } 00371 00372 // Run cube 00373 WordAltList *cube_alt_list = cube_obj->RecognizeWord(); 00374 if (!cube_alt_list || cube_alt_list->AltCount() <= 0) { 00375 if (cube_debug_level > 0) { 00376 tprintf("Cube returned nothing for word at:"); 00377 word->word->bounding_box().print(); 00378 } 00379 word->SetupFake(unicharset); 00380 return false; 00381 } 00382 00383 // Get cube's best result and its probability, mapped to tesseract's 00384 // certainty range 00385 char_32 *cube_best_32 = cube_alt_list->Alt(0); 00386 double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0)); 00387 float cube_certainty = convert_prob_to_tess_certainty(cube_prob); 00388 string cube_best_str; 00389 CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str); 00390 00391 // Retrieve Cube's character bounding boxes and CharSamples, 00392 // corresponding to the most recent call to RecognizeWord(). 00393 Boxa *char_boxes = NULL; 00394 CharSamp **char_samples = NULL;; 00395 int num_chars; 00396 if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples) 00397 && cube_debug_level > 0) { 00398 tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract " 00399 "cube state.\n"); 00400 word->SetupFake(unicharset); 00401 return false; 00402 } 00403 00404 // Convert cube's character bounding boxes to a BoxWord. 00405 BoxWord cube_box_word; 00406 TBOX tess_word_box = word->word->bounding_box(); 00407 if (word->denorm.block() != NULL) 00408 tess_word_box.rotate(word->denorm.block()->re_rotation()); 00409 bool box_word_success = create_cube_box_word(char_boxes, num_chars, 00410 tess_word_box, 00411 &cube_box_word); 00412 boxaDestroy(&char_boxes); 00413 if (!box_word_success) { 00414 if (cube_debug_level > 0) { 00415 tprintf("Cube WARNING (Tesseract::cube_recognize): Could not " 00416 "create cube BoxWord\n"); 00417 } 00418 word->SetupFake(unicharset); 00419 return false; 00420 } 00421 00422 // Create cube's best choice. 00423 WERD_CHOICE* cube_werd_choice = create_werd_choice( 00424 char_samples, num_chars, cube_best_str.c_str(), cube_certainty, 00425 unicharset, cube_cntxt_->CharacterSet()); 00426 delete []char_samples; 00427 00428 if (!cube_werd_choice) { 00429 if (cube_debug_level > 0) { 00430 tprintf("Cube WARNING (Tesseract::cube_recognize): Could not " 00431 "create cube WERD_CHOICE\n"); 00432 } 00433 word->SetupFake(unicharset); 00434 return false; 00435 } 00436 if (cube_debug_level || classify_debug_level) { 00437 tprintf("Cube result: %s r=%g, c=%g\n", 00438 cube_werd_choice->unichar_string().string(), 00439 cube_werd_choice->rating(), 00440 cube_werd_choice->certainty()); 00441 } 00442 00443 // Fill tesseract result's fields with cube results 00444 fill_werd_res(cube_box_word, cube_werd_choice, cube_best_str.c_str(), word); 00445 return true; 00446 } 00447 00448 /********************************************************************** 00449 * fill_werd_res 00450 * 00451 * Fill Tesseract's word result fields with cube's. 00452 * 00453 **********************************************************************/ 00454 void Tesseract::fill_werd_res(const BoxWord& cube_box_word, 00455 WERD_CHOICE* cube_werd_choice, 00456 const char* cube_best_str, 00457 WERD_RES* tess_werd_res) { 00458 // Replace tesseract results's best choice with cube's 00459 tess_werd_res->best_choice = cube_werd_choice; 00460 tess_werd_res->raw_choice = new WERD_CHOICE(*cube_werd_choice); 00461 00462 delete tess_werd_res->box_word; 00463 tess_werd_res->box_word = new BoxWord(cube_box_word); 00464 tess_werd_res->box_word->ClipToOriginalWord(tess_werd_res->denorm.block(), 00465 tess_werd_res->word); 00466 // Fill text and remaining fields 00467 tess_werd_res->word->set_text(cube_best_str); 00468 tess_werd_res->tess_failed = FALSE; 00469 tess_werd_res->tess_accepted = 00470 tess_acceptable_word(tess_werd_res->best_choice, 00471 tess_werd_res->raw_choice); 00472 // There is no output word, so we can' call AdaptableWord, but then I don't 00473 // think we need to. Fudge the result with accepted. 00474 tess_werd_res->tess_would_adapt = tess_werd_res->tess_accepted; 00475 00476 // Initialize the reject_map and set it to done, i.e., ignore all of 00477 // tesseract's tests for rejection 00478 tess_werd_res->reject_map.initialise(cube_werd_choice->length()); 00479 tess_werd_res->done = tess_werd_res->tess_accepted; 00480 00481 // Some sanity checks 00482 ASSERT_HOST(tess_werd_res->best_choice->length() == 00483 tess_werd_res->best_choice->blob_choices()->length()); 00484 ASSERT_HOST(tess_werd_res->best_choice->length() == 00485 tess_werd_res->reject_map.length()); 00486 } 00487 00488 } // namespace tesseract