Tesseract  3.02
tesseract-ocr/ccmain/cube_control.cpp
Go to the documentation of this file.
00001 /******************************************************************
00002  * File:        cube_control.cpp
00003  * Description: Tesseract class methods for invoking cube convolutional
00004  *              neural network word recognizer.
00005  * Author:      Raquel Romano
00006  * Created:     September 2009
00007  *
00008  **********************************************************************/
00009 
00010 // Include automatically generated configuration file if running autoconf.
00011 #ifdef HAVE_CONFIG_H
00012 #include "config_auto.h"
00013 #endif
00014 
00015 #include "allheaders.h"
00016 
00017 #include "cube_object.h"
00018 #include "cube_reco_context.h"
00019 #include "tesseractclass.h"
00020 #include "tesseract_cube_combiner.h"
00021 
00022 namespace tesseract {
00023 
00024 /**********************************************************************
00025  * convert_prob_to_tess_certainty
00026  *
00027  * Normalize a probability in the range [0.0, 1.0] to a tesseract
00028  * certainty in the range [-20.0, 0.0]
00029  **********************************************************************/
00030 static float convert_prob_to_tess_certainty(float prob) {
00031   return (prob - 1.0) * 20.0;
00032 }
00033 
00034 /**********************************************************************
00035  * char_box_to_tbox
00036  *
00037  * Create a TBOX from a character bounding box. If nonzero, the
00038  * x_offset accounts for any additional padding of the word box that
00039  * should be taken into account.
00040  *
00041  **********************************************************************/
00042 TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) {
00043   l_int32 left;
00044   l_int32 top;
00045   l_int32 width;
00046   l_int32 height;
00047   l_int32 right;
00048   l_int32 bottom;
00049 
00050   boxGetGeometry(char_box, &left, &top, &width, &height);
00051   left += word_box.left() - x_offset;
00052   right = left + width;
00053   top = word_box.bottom() + word_box.height() - top;
00054   bottom = top - height;
00055   return TBOX(left, bottom, right, top);
00056 }
00057 
00058 /**********************************************************************
00059  * extract_cube_state
00060  *
00061  * Extract CharSamp objects and character bounding boxes from the
00062  * CubeObject's state. The caller should free both structres.
00063  *
00064 **********************************************************************/
00065 bool Tesseract::extract_cube_state(CubeObject* cube_obj,
00066                                    int* num_chars,
00067                                    Boxa** char_boxes,
00068                                    CharSamp*** char_samples) {
00069   if (!cube_obj) {
00070     if (cube_debug_level > 0) {
00071       tprintf("Cube WARNING (extract_cube_state): Invalid cube object "
00072               "passed to extract_cube_state\n");
00073     }
00074     return false;
00075   }
00076 
00077   // Note that the CubeObject accessors return either the deslanted or
00078   // regular objects search object or beam search object, whichever
00079   // was used in the last call to Recognize()
00080   CubeSearchObject* cube_search_obj = cube_obj->SrchObj();
00081   if (!cube_search_obj) {
00082     if (cube_debug_level > 0) {
00083       tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
00084               "cube's search object in extract_cube_state.\n");
00085     }
00086     return false;
00087   }
00088   BeamSearch *beam_search_obj = cube_obj->BeamObj();
00089   if (!beam_search_obj) {
00090     if (cube_debug_level > 0) {
00091       tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
00092               "cube's beam search object in extract_cube_state.\n");
00093     }
00094     return false;
00095   }
00096 
00097   // Get the character samples and bounding boxes by backtracking
00098   // through the beam search path
00099   int best_node_index = beam_search_obj->BestPresortedNodeIndex();
00100   *char_samples = beam_search_obj->BackTrack(
00101       cube_search_obj, best_node_index, num_chars, NULL, char_boxes);
00102   if (!*char_samples)
00103     return false;
00104   return true;
00105 }
00106 
00107 /**********************************************************************
00108  * create_cube_box_word
00109  *
00110  * Fill the given BoxWord with boxes from character bounding
00111  * boxes. The char_boxes have local coordinates w.r.t. the
00112  * word bounding box, i.e., the left-most character bbox of each word
00113  * has (0,0) left-top coord, but the BoxWord must be defined in page
00114  * coordinates.
00115  **********************************************************************/
00116 bool Tesseract::create_cube_box_word(Boxa *char_boxes,
00117                                      int num_chars,
00118                                      TBOX word_box,
00119                                      BoxWord* box_word) {
00120   if (!box_word) {
00121     if (cube_debug_level > 0) {
00122       tprintf("Cube WARNING (create_cube_box_word): Invalid box_word.\n");
00123     }
00124     return false;
00125   }
00126 
00127   // Find the x-coordinate of left-most char_box, which could be
00128   // nonzero if the word image was padded before recognition took place.
00129   int x_offset = -1;
00130   for (int i = 0; i < num_chars; ++i) {
00131     Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
00132     if (x_offset < 0 || char_box->x < x_offset) {
00133       x_offset = char_box->x;
00134     }
00135     boxDestroy(&char_box);
00136   }
00137 
00138   for (int i = 0; i < num_chars; ++i) {
00139     Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
00140     TBOX tbox = char_box_to_tbox(char_box, word_box, x_offset);
00141     boxDestroy(&char_box);
00142     box_word->InsertBox(i, tbox);
00143   }
00144   return true;
00145 }
00146 
00147 /**********************************************************************
00148  * create_werd_choice
00149  *
00150  **********************************************************************/
00151 static WERD_CHOICE *create_werd_choice(
00152                                        CharSamp** char_samples,
00153                                        int num_chars,
00154                                        const char* str,
00155                                        float certainty,
00156                                        const UNICHARSET &unicharset,
00157                                        CharSet* cube_char_set
00158                                        ) {
00159   // Insert unichar ids into WERD_CHOICE
00160   WERD_CHOICE *werd_choice = new WERD_CHOICE(&unicharset, num_chars);
00161   // within a word, cube recognizes the word in reading order.
00162   werd_choice->set_unichars_in_script_order(true);
00163   ASSERT_HOST(werd_choice != NULL);
00164   UNICHAR_ID uch_id;
00165   for (int i = 0; i < num_chars; ++i) {
00166     uch_id = cube_char_set->UnicharID(char_samples[i]->StrLabel());
00167     if (uch_id != INVALID_UNICHAR_ID)
00168       werd_choice->append_unichar_id_space_allocated(
00169           uch_id, 1, 0.0, certainty);
00170   }
00171 
00172   BLOB_CHOICE *blob_choice;
00173   BLOB_CHOICE_LIST *choices_list;
00174   BLOB_CHOICE_IT choices_list_it;
00175   BLOB_CHOICE_LIST_CLIST *blob_choices = new BLOB_CHOICE_LIST_CLIST();
00176   BLOB_CHOICE_LIST_C_IT blob_choices_it;
00177   blob_choices_it.set_to_list(blob_choices);
00178 
00179   for (int i = 0; i < werd_choice->length(); ++i) {
00180     // Create new BLOB_CHOICE_LIST for this unichar
00181     choices_list = new BLOB_CHOICE_LIST();
00182     choices_list_it.set_to_list(choices_list);
00183     // Add a single BLOB_CHOICE to the list
00184     blob_choice = new BLOB_CHOICE(werd_choice->unichar_id(i),
00185                                   0.0, certainty, -1, -1, 0, 0, 0, false);
00186     choices_list_it.add_after_then_move(blob_choice);
00187     // Add list to the clist
00188     blob_choices_it.add_to_end(choices_list);
00189   }
00190   werd_choice->set_certainty(certainty);
00191   werd_choice->set_blob_choices(blob_choices);
00192   return werd_choice;
00193 }
00194 
00195 /**********************************************************************
00196  * init_cube_objects
00197  *
00198  * Instantiates Tesseract object's CubeRecoContext and TesseractCubeCombiner.
00199  * Returns false if cube context could not be created or if load_combiner is
00200  * true, but the combiner could not be loaded.
00201  **********************************************************************/
00202 bool Tesseract::init_cube_objects(bool load_combiner,
00203                                   TessdataManager *tessdata_manager) {
00204   ASSERT_HOST(cube_cntxt_ == NULL);
00205   ASSERT_HOST(tess_cube_combiner_ == NULL);
00206 
00207   // Create the cube context object
00208   cube_cntxt_ = CubeRecoContext::Create(this, tessdata_manager, &unicharset);
00209   if (cube_cntxt_ == NULL) {
00210     if (cube_debug_level > 0) {
00211       tprintf("Cube WARNING (Tesseract::init_cube_objects()): Failed to "
00212               "instantiate CubeRecoContext\n");
00213     }
00214     return false;
00215   }
00216 
00217   // Create the combiner object and load the combiner net for target languages.
00218   if (load_combiner) {
00219     tess_cube_combiner_ = new tesseract::TesseractCubeCombiner(cube_cntxt_);
00220     if (!tess_cube_combiner_ || !tess_cube_combiner_->LoadCombinerNet()) {
00221       delete cube_cntxt_;
00222       cube_cntxt_ = NULL;
00223       if (tess_cube_combiner_ != NULL) {
00224         delete tess_cube_combiner_;
00225         tess_cube_combiner_ = NULL;
00226       }
00227       if (cube_debug_level > 0)
00228         tprintf("Cube ERROR (Failed to instantiate TesseractCubeCombiner\n");
00229       return false;
00230     }
00231   }
00232   return true;
00233 }
00234 
00235 /**********************************************************************
00236  * run_cube_combiner
00237  *
00238  * Iterates through tesseract's results and calls cube on each word,
00239  * combining the results with the existing tesseract result.
00240  **********************************************************************/
00241 void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
00242   if (page_res == NULL || tess_cube_combiner_ == NULL)
00243     return;
00244   PAGE_RES_IT page_res_it(page_res);
00245   // Iterate through the word results and call cube on each word.
00246   for (page_res_it.restart_page(); page_res_it.word () != NULL;
00247        page_res_it.forward()) {
00248     WERD_RES* word = page_res_it.word();
00249     // Skip cube entirely if tesseract's certainty is greater than threshold.
00250     int combiner_run_thresh = convert_prob_to_tess_certainty(
00251         cube_cntxt_->Params()->CombinerRunThresh());
00252     if (word->best_choice->certainty() >= combiner_run_thresh) {
00253       continue;
00254     }
00255     // Use the same language as Tesseract used for the word.
00256     Tesseract* lang_tess = word->tesseract;
00257 
00258     // Setup a trial WERD_RES in which to classify with cube.
00259     WERD_RES cube_word;
00260     cube_word.InitForRetryRecognition(*word);
00261     CubeObject *cube_obj = lang_tess->cube_recognize_word(
00262         page_res_it.block()->block, &cube_word);
00263     if (cube_obj != NULL)
00264       lang_tess->cube_combine_word(cube_obj, &cube_word, word);
00265     delete cube_obj;
00266   }
00267 }
00268 
00269 /**********************************************************************
00270  * cube_word_pass1
00271  *
00272  * Recognizes a single word using (only) cube. Compatible with
00273  * Tesseract's classify_word_pass1/classify_word_pass2.
00274  **********************************************************************/
00275 void Tesseract::cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) {
00276   CubeObject *cube_obj = cube_recognize_word(block, word);
00277   delete cube_obj;
00278 }
00279 
00280 /**********************************************************************
00281  * cube_recognize_word
00282  *
00283  * Cube recognizer to recognize a single word as with classify_word_pass1
00284  * but also returns the cube object in case the combiner is needed.
00285  **********************************************************************/
00286 CubeObject* Tesseract::cube_recognize_word(BLOCK* block, WERD_RES* word) {
00287   if (!cube_binary_ || !cube_cntxt_) {
00288     if (cube_debug_level > 0 && !cube_binary_)
00289       tprintf("Tesseract::run_cube(): NULL binary image.\n");
00290     word->SetupFake(unicharset);
00291     return NULL;
00292   }
00293   TBOX word_box = word->word->bounding_box();
00294   if (block != NULL && (block->re_rotation().x() != 1.0f ||
00295         block->re_rotation().y() != 0.0f)) {
00296     // TODO(rays) We have to rotate the bounding box to get the true coords.
00297     // This will be achieved in the future via DENORM.
00298     // In the mean time, cube can't process this word.
00299     if (cube_debug_level > 0) {
00300       tprintf("Cube can't process rotated word at:");
00301       word_box.print();
00302     }
00303     word->SetupFake(unicharset);
00304     return NULL;
00305   }
00306   CubeObject* cube_obj = new tesseract::CubeObject(
00307       cube_cntxt_, cube_binary_, word_box.left(),
00308       pixGetHeight(cube_binary_) - word_box.top(),
00309       word_box.width(), word_box.height());
00310   if (!cube_recognize(cube_obj, block, word)) {
00311     delete cube_obj;
00312     return NULL;
00313   }
00314   return cube_obj;
00315 }
00316 
00317 /**********************************************************************
00318  * cube_combine_word
00319  *
00320  * Combines the cube and tesseract results for a single word, leaving the
00321  * result in tess_word.
00322  **********************************************************************/
00323 void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
00324                                   WERD_RES* tess_word) {
00325   float combiner_prob = tess_cube_combiner_->CombineResults(tess_word,
00326                                                             cube_obj);
00327   // If combiner probability is greater than tess/cube combiner
00328   // classifier threshold, i.e. tesseract wins, then just return the
00329   // tesseract result unchanged, as the combiner knows nothing about how
00330   // correct the answer is. If cube and tesseract agree, then improve the
00331   // scores before returning.
00332   WERD_CHOICE* tess_best = tess_word->best_choice;
00333   WERD_CHOICE* cube_best = cube_word->best_choice;
00334   if (cube_debug_level || classify_debug_level) {
00335     tprintf("Combiner prob = %g vs threshold %g\n",
00336             combiner_prob, cube_cntxt_->Params()->CombinerClassifierThresh());
00337   }
00338   if (combiner_prob >=
00339       cube_cntxt_->Params()->CombinerClassifierThresh()) {
00340     if (tess_best->unichar_string() == cube_best->unichar_string()) {
00341       // Cube and tess agree, so improve the scores.
00342       tess_best->set_rating(tess_best->rating() / 2);
00343       tess_best->set_certainty(tess_best->certainty() / 2);
00344     }
00345     return;
00346   }
00347   // Cube wins.
00348   // It is better for the language combiner to have all tesseract scores,
00349   // so put them in the cube result.
00350   cube_best->set_rating(tess_best->rating());
00351   cube_best->set_certainty(tess_best->certainty());
00352   if (cube_debug_level || classify_debug_level) {
00353     tprintf("Cube INFO: tesseract result replaced by cube: %s -> %s\n",
00354             tess_best->unichar_string().string(),
00355             cube_best->unichar_string().string());
00356   }
00357   tess_word->ConsumeWordResults(cube_word);
00358 }
00359 
00360 /**********************************************************************
00361  * cube_recognize
00362  *
00363  * Call cube on the current word, and write the result to word.
00364  * Sets up a fake result and returns false if something goes wrong.
00365  **********************************************************************/
00366 bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
00367                                WERD_RES *word) {
00368   if (!word->SetupForCubeRecognition(unicharset, this, block)) {
00369     return false;  // Graphics block.
00370   }
00371 
00372   // Run cube
00373   WordAltList *cube_alt_list = cube_obj->RecognizeWord();
00374   if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {
00375     if (cube_debug_level > 0) {
00376       tprintf("Cube returned nothing for word at:");
00377       word->word->bounding_box().print();
00378     }
00379     word->SetupFake(unicharset);
00380     return false;
00381   }
00382 
00383   // Get cube's best result and its probability, mapped to tesseract's
00384   // certainty range
00385   char_32 *cube_best_32 = cube_alt_list->Alt(0);
00386   double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0));
00387   float cube_certainty = convert_prob_to_tess_certainty(cube_prob);
00388   string cube_best_str;
00389   CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str);
00390 
00391   // Retrieve Cube's character bounding boxes and CharSamples,
00392   // corresponding to the most recent call to RecognizeWord().
00393   Boxa *char_boxes = NULL;
00394   CharSamp **char_samples = NULL;;
00395   int num_chars;
00396   if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples)
00397       && cube_debug_level > 0) {
00398     tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract "
00399             "cube state.\n");
00400     word->SetupFake(unicharset);
00401     return false;
00402   }
00403 
00404   // Convert cube's character bounding boxes to a BoxWord.
00405   BoxWord cube_box_word;
00406   TBOX tess_word_box = word->word->bounding_box();
00407   if (word->denorm.block() != NULL)
00408     tess_word_box.rotate(word->denorm.block()->re_rotation());
00409   bool box_word_success = create_cube_box_word(char_boxes, num_chars,
00410                                                tess_word_box,
00411                                                &cube_box_word);
00412   boxaDestroy(&char_boxes);
00413   if (!box_word_success) {
00414     if (cube_debug_level > 0) {
00415       tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
00416               "create cube BoxWord\n");
00417     }
00418     word->SetupFake(unicharset);
00419     return false;
00420   }
00421 
00422   // Create cube's best choice.
00423   WERD_CHOICE* cube_werd_choice = create_werd_choice(
00424       char_samples, num_chars, cube_best_str.c_str(), cube_certainty,
00425       unicharset, cube_cntxt_->CharacterSet());
00426   delete []char_samples;
00427 
00428   if (!cube_werd_choice) {
00429     if (cube_debug_level > 0) {
00430       tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
00431               "create cube WERD_CHOICE\n");
00432     }
00433     word->SetupFake(unicharset);
00434     return false;
00435   }
00436   if (cube_debug_level || classify_debug_level) {
00437     tprintf("Cube result: %s r=%g, c=%g\n",
00438             cube_werd_choice->unichar_string().string(),
00439             cube_werd_choice->rating(),
00440             cube_werd_choice->certainty());
00441   }
00442 
00443   // Fill tesseract result's fields with cube results
00444   fill_werd_res(cube_box_word, cube_werd_choice, cube_best_str.c_str(), word);
00445   return true;
00446 }
00447 
00448 /**********************************************************************
00449  * fill_werd_res
00450  *
00451  * Fill Tesseract's word result fields with cube's.
00452  *
00453  **********************************************************************/
00454 void Tesseract::fill_werd_res(const BoxWord& cube_box_word,
00455                               WERD_CHOICE* cube_werd_choice,
00456                               const char* cube_best_str,
00457                               WERD_RES* tess_werd_res) {
00458   // Replace tesseract results's best choice with cube's
00459   tess_werd_res->best_choice = cube_werd_choice;
00460   tess_werd_res->raw_choice = new WERD_CHOICE(*cube_werd_choice);
00461 
00462   delete tess_werd_res->box_word;
00463   tess_werd_res->box_word = new BoxWord(cube_box_word);
00464   tess_werd_res->box_word->ClipToOriginalWord(tess_werd_res->denorm.block(),
00465                                               tess_werd_res->word);
00466   // Fill text and remaining fields
00467   tess_werd_res->word->set_text(cube_best_str);
00468   tess_werd_res->tess_failed = FALSE;
00469   tess_werd_res->tess_accepted =
00470       tess_acceptable_word(tess_werd_res->best_choice,
00471                            tess_werd_res->raw_choice);
00472   // There is no output word, so we can' call AdaptableWord, but then I don't
00473   // think we need to. Fudge the result with accepted.
00474   tess_werd_res->tess_would_adapt = tess_werd_res->tess_accepted;
00475 
00476   // Initialize the reject_map and set it to done, i.e., ignore all of
00477   // tesseract's tests for rejection
00478   tess_werd_res->reject_map.initialise(cube_werd_choice->length());
00479   tess_werd_res->done = tess_werd_res->tess_accepted;
00480 
00481   // Some sanity checks
00482   ASSERT_HOST(tess_werd_res->best_choice->length() ==
00483               tess_werd_res->best_choice->blob_choices()->length());
00484   ASSERT_HOST(tess_werd_res->best_choice->length() ==
00485               tess_werd_res->reject_map.length());
00486 }
00487 
00488 }  // namespace tesseract