Tesseract  3.02
tesseract-ocr/cube/char_samp.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        char_samp.cpp
00003  * Description: Implementation of a Character Bitmap Sample Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include <string.h>
00021 #include <string>
00022 #include "char_samp.h"
00023 #include "cube_utils.h"
00024 
00025 namespace tesseract {
00026 
00027 #define MAX_LINE_LEN  1024
00028 
00029 CharSamp::CharSamp()
00030     : Bmp8(0, 0) {
00031   left_ = 0;
00032   top_ = 0;
00033   label32_ = NULL;
00034   page_ = -1;
00035 }
00036 
00037 CharSamp::CharSamp(int wid, int hgt)
00038     : Bmp8(wid, hgt) {
00039   left_ = 0;
00040   top_ = 0;
00041   label32_ = NULL;
00042   page_ = -1;
00043 }
00044 
00045 CharSamp::CharSamp(int left, int top, int wid, int hgt)
00046     : Bmp8(wid, hgt)
00047     , left_(left)
00048     , top_(top) {
00049   label32_ = NULL;
00050   page_ = -1;
00051 }
00052 
00053 CharSamp::~CharSamp() {
00054   if (label32_ != NULL) {
00055     delete []label32_;
00056     label32_ = NULL;
00057   }
00058 }
00059 
00060 // returns a UTF-8 version of the string label
00061 string CharSamp::stringLabel() const {
00062   string str = "";
00063   if (label32_ != NULL) {
00064     string_32 str32(label32_);
00065     CubeUtils::UTF32ToUTF8(str32.c_str(), &str);
00066   }
00067   return str;
00068 }
00069 
00070 // set a the string label using a UTF encoded string
00071 void CharSamp::SetLabel(string str) {
00072   if (label32_ != NULL) {
00073     delete []label32_;
00074     label32_ = NULL;
00075   }
00076   string_32 str32;
00077   CubeUtils::UTF8ToUTF32(str.c_str(), &str32);
00078   SetLabel(reinterpret_cast<const char_32 *>(str32.c_str()));
00079 }
00080 
00081 // creates a CharSamp object from file
00082 CharSamp *CharSamp::FromCharDumpFile(CachedFile *fp) {
00083   unsigned short left;
00084   unsigned short top;
00085   unsigned short page;
00086   unsigned short first_char;
00087   unsigned short last_char;
00088   unsigned short norm_top;
00089   unsigned short norm_bottom;
00090   unsigned short norm_aspect_ratio;
00091   unsigned int val32;
00092 
00093   char_32 *label32;
00094 
00095   // read and check 32 bit marker
00096   if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
00097     return NULL;
00098   }
00099   if (val32 != 0xabd0fefe) {
00100     return NULL;
00101   }
00102   // read label length,
00103   if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
00104     return NULL;
00105   }
00106   // the label is not null terminated in the file
00107   if (val32 > 0) {
00108     label32 = new char_32[val32 + 1];
00109     if (label32 == NULL) {
00110       return NULL;
00111     }
00112     // read label
00113     if (fp->Read(label32, val32 * sizeof(*label32)) !=
00114         (val32 * sizeof(*label32))) {
00115       return NULL;
00116     }
00117     // null terminate
00118     label32[val32] = 0;
00119   } else {
00120     label32 = NULL;
00121   }
00122   // read coordinates
00123   if (fp->Read(&page, sizeof(page)) != sizeof(page)) {
00124     return NULL;
00125   }
00126   if (fp->Read(&left, sizeof(left)) != sizeof(left)) {
00127     return NULL;
00128   }
00129   if (fp->Read(&top, sizeof(top)) != sizeof(top)) {
00130     return NULL;
00131   }
00132   if (fp->Read(&first_char, sizeof(first_char)) != sizeof(first_char)) {
00133     return NULL;
00134   }
00135   if (fp->Read(&last_char, sizeof(last_char)) != sizeof(last_char)) {
00136     return NULL;
00137   }
00138   if (fp->Read(&norm_top, sizeof(norm_top)) != sizeof(norm_top)) {
00139     return NULL;
00140   }
00141   if (fp->Read(&norm_bottom, sizeof(norm_bottom)) != sizeof(norm_bottom)) {
00142     return NULL;
00143   }
00144   if (fp->Read(&norm_aspect_ratio, sizeof(norm_aspect_ratio)) !=
00145       sizeof(norm_aspect_ratio)) {
00146     return NULL;
00147   }
00148   // create the object
00149   CharSamp *char_samp = new CharSamp();
00150   if (char_samp == NULL) {
00151     return NULL;
00152   }
00153   // init
00154   char_samp->label32_ = label32;
00155   char_samp->page_ = page;
00156   char_samp->left_ = left;
00157   char_samp->top_ = top;
00158   char_samp->first_char_ = first_char;
00159   char_samp->last_char_ = last_char;
00160   char_samp->norm_top_ = norm_top;
00161   char_samp->norm_bottom_ = norm_bottom;
00162   char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
00163   // load the Bmp8 part
00164   if (char_samp->LoadFromCharDumpFile(fp) == false) {
00165     delete char_samp;
00166     return NULL;
00167   }
00168   return char_samp;
00169 }
00170 
00171 // Load a Char Samp from a dump file
00172 CharSamp *CharSamp::FromCharDumpFile(FILE *fp) {
00173   unsigned short left;
00174   unsigned short top;
00175   unsigned short page;
00176   unsigned short first_char;
00177   unsigned short last_char;
00178   unsigned short norm_top;
00179   unsigned short norm_bottom;
00180   unsigned short norm_aspect_ratio;
00181   unsigned int val32;
00182   char_32 *label32;
00183 
00184   // read and check 32 bit marker
00185   if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
00186     return NULL;
00187   }
00188   if (val32 != 0xabd0fefe) {
00189     return NULL;
00190   }
00191   // read label length,
00192   if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
00193     return NULL;
00194   }
00195   // the label is not null terminated in the file
00196   if (val32 > 0) {
00197     label32 = new char_32[val32 + 1];
00198     if (label32 == NULL) {
00199       return NULL;
00200     }
00201     // read label
00202     if (fread(label32, 1, val32 * sizeof(*label32), fp) !=
00203         (val32 * sizeof(*label32))) {
00204       return NULL;
00205     }
00206     // null terminate
00207     label32[val32] = 0;
00208   } else {
00209     label32 = NULL;
00210   }
00211   // read coordinates
00212   if (fread(&page, 1, sizeof(page), fp) != sizeof(page)) {
00213     return NULL;
00214   }
00215   if (fread(&left, 1, sizeof(left), fp) != sizeof(left)) {
00216     return NULL;
00217   }
00218   if (fread(&top, 1, sizeof(top), fp) != sizeof(top)) {
00219     return NULL;
00220   }
00221   if (fread(&first_char, 1, sizeof(first_char), fp) != sizeof(first_char)) {
00222     return NULL;
00223   }
00224   if (fread(&last_char, 1, sizeof(last_char), fp) != sizeof(last_char)) {
00225     return NULL;
00226   }
00227   if (fread(&norm_top, 1, sizeof(norm_top), fp) != sizeof(norm_top)) {
00228     return NULL;
00229   }
00230   if (fread(&norm_bottom, 1, sizeof(norm_bottom), fp) != sizeof(norm_bottom)) {
00231     return NULL;
00232   }
00233   if (fread(&norm_aspect_ratio, 1, sizeof(norm_aspect_ratio), fp) !=
00234       sizeof(norm_aspect_ratio)) {
00235     return NULL;
00236   }
00237   // create the object
00238   CharSamp *char_samp = new CharSamp();
00239   if (char_samp == NULL) {
00240     return NULL;
00241   }
00242   // init
00243   char_samp->label32_ = label32;
00244   char_samp->page_ = page;
00245   char_samp->left_ = left;
00246   char_samp->top_ = top;
00247   char_samp->first_char_ = first_char;
00248   char_samp->last_char_ = last_char;
00249   char_samp->norm_top_ = norm_top;
00250   char_samp->norm_bottom_ = norm_bottom;
00251   char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
00252   // load the Bmp8 part
00253   if (char_samp->LoadFromCharDumpFile(fp) == false) {
00254     return NULL;
00255   }
00256   return char_samp;
00257 }
00258 
00259 // returns a copy of the charsamp that is scaled to the
00260 // specified width and height
00261 CharSamp *CharSamp::Scale(int wid, int hgt, bool isotropic) {
00262   CharSamp *scaled_samp = new CharSamp(wid, hgt);
00263   if (scaled_samp == NULL) {
00264     return NULL;
00265   }
00266   if (scaled_samp->ScaleFrom(this, isotropic) == false) {
00267     delete scaled_samp;
00268     return NULL;
00269   }
00270   scaled_samp->left_ = left_;
00271   scaled_samp->top_ = top_;
00272   scaled_samp->page_ = page_;
00273   scaled_samp->SetLabel(label32_);
00274   scaled_samp->first_char_ = first_char_;
00275   scaled_samp->last_char_ = last_char_;
00276   scaled_samp->norm_top_ = norm_top_;
00277   scaled_samp->norm_bottom_ = norm_bottom_;
00278   scaled_samp->norm_aspect_ratio_ = norm_aspect_ratio_;
00279   return scaled_samp;
00280 }
00281 
00282 // Load a Char Samp from a dump file
00283 CharSamp *CharSamp::FromRawData(int left, int top, int wid, int hgt,
00284                                 unsigned char *data) {
00285   // create the object
00286   CharSamp *char_samp = new CharSamp(left, top, wid, hgt);
00287   if (char_samp == NULL) {
00288     return NULL;
00289   }
00290   if (char_samp->LoadFromRawData(data) == false) {
00291     delete char_samp;
00292     return NULL;
00293   }
00294   return char_samp;
00295 }
00296 
00297 // Saves the charsamp to a dump file
00298 bool CharSamp::Save2CharDumpFile(FILE *fp) const {
00299   unsigned int val32;
00300   // write and check 32 bit marker
00301   val32 = 0xabd0fefe;
00302   if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
00303     return false;
00304   }
00305   // write label length
00306   val32 = (label32_ == NULL) ? 0 : LabelLen(label32_);
00307   if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
00308     return false;
00309   }
00310   // write label
00311   if (label32_ != NULL) {
00312     if (fwrite(label32_, 1, val32 * sizeof(*label32_), fp) !=
00313         (val32 * sizeof(*label32_))) {
00314       return false;
00315     }
00316   }
00317   // write coordinates
00318   if (fwrite(&page_, 1, sizeof(page_), fp) != sizeof(page_)) {
00319     return false;
00320   }
00321   if (fwrite(&left_, 1, sizeof(left_), fp) != sizeof(left_)) {
00322     return false;
00323   }
00324   if (fwrite(&top_, 1, sizeof(top_), fp) != sizeof(top_)) {
00325     return false;
00326   }
00327   if (fwrite(&first_char_, 1, sizeof(first_char_), fp) !=
00328       sizeof(first_char_)) {
00329     return false;
00330   }
00331   if (fwrite(&last_char_, 1, sizeof(last_char_), fp) != sizeof(last_char_)) {
00332     return false;
00333   }
00334   if (fwrite(&norm_top_, 1, sizeof(norm_top_), fp) != sizeof(norm_top_)) {
00335     return false;
00336   }
00337   if (fwrite(&norm_bottom_, 1, sizeof(norm_bottom_), fp) !=
00338       sizeof(norm_bottom_)) {
00339     return false;
00340   }
00341   if (fwrite(&norm_aspect_ratio_, 1, sizeof(norm_aspect_ratio_), fp) !=
00342       sizeof(norm_aspect_ratio_)) {
00343     return false;
00344   }
00345   if (SaveBmp2CharDumpFile(fp) == false) {
00346     return false;
00347   }
00348   return true;
00349 }
00350 
00351 // Crop the char samp such that there are no white spaces on any side.
00352 // The norm_top_ and norm_bottom_ fields are the character top/bottom
00353 // with respect to whatever context the character is being recognized
00354 // in (e.g. word bounding box) normalized to a standard size of
00355 // 255. Here they default to 0 and 255 (word box boundaries), but
00356 // since they are context dependent, they may need to be reset by the
00357 // calling function.
00358 CharSamp *CharSamp::Crop() {
00359   // get the dimesions of the cropped img
00360   int cropped_left = 0;
00361   int cropped_top = 0;
00362   int cropped_wid = wid_;
00363   int cropped_hgt = hgt_;
00364   Bmp8::Crop(&cropped_left, &cropped_top,
00365              &cropped_wid, &cropped_hgt);
00366 
00367   if (cropped_wid == 0 || cropped_hgt == 0) {
00368     return NULL;
00369   }
00370   // create the cropped char samp
00371   CharSamp *cropped_samp = new CharSamp(left_ + cropped_left,
00372                                         top_ + cropped_top,
00373                                         cropped_wid, cropped_hgt);
00374   cropped_samp->SetLabel(label32_);
00375   cropped_samp->SetFirstChar(first_char_);
00376   cropped_samp->SetLastChar(last_char_);
00377   // the following 3 fields may/should be reset by the calling function
00378   // using context information, i.e., location of character box
00379   // w.r.t. the word bounding box
00380   cropped_samp->SetNormAspectRatio(255 *
00381                                    cropped_wid / (cropped_wid + cropped_hgt));
00382   cropped_samp->SetNormTop(0);
00383   cropped_samp->SetNormBottom(255);
00384 
00385   // copy the bitmap to the cropped img
00386   Copy(cropped_left, cropped_top, cropped_wid, cropped_hgt, cropped_samp);
00387   return cropped_samp;
00388 }
00389 
00390 // segment the char samp to connected components
00391 // based on contiguity and vertical pixel density histogram
00392 ConComp **CharSamp::Segment(int *segment_cnt, bool right_2_left,
00393                             int max_hist_wnd, int min_con_comp_size) const {
00394   // init
00395   (*segment_cnt) = 0;
00396   int concomp_cnt = 0;
00397   int seg_cnt = 0;
00398   // find the concomps of the image
00399   ConComp **concomp_array = FindConComps(&concomp_cnt, min_con_comp_size);
00400   if (concomp_cnt <= 0 || !concomp_array) {
00401     if (concomp_array)
00402       delete []concomp_array;
00403     return NULL;
00404   }
00405   ConComp **seg_array = NULL;
00406   // segment each concomp further using vertical histogram
00407   for (int concomp = 0; concomp < concomp_cnt; concomp++) {
00408     int concomp_seg_cnt = 0;
00409     // segment the concomp
00410     ConComp **concomp_seg_array = NULL;
00411     ConComp **concomp_alloc_seg =
00412         concomp_array[concomp]->Segment(max_hist_wnd, &concomp_seg_cnt);
00413     // no segments, add the whole concomp
00414     if (concomp_alloc_seg == NULL) {
00415       concomp_seg_cnt = 1;
00416       concomp_seg_array = concomp_array + concomp;
00417     } else {
00418       // delete the original concomp, we no longer need it
00419       concomp_seg_array = concomp_alloc_seg;
00420       delete concomp_array[concomp];
00421     }
00422     // add the resulting segments
00423     for (int seg_idx = 0; seg_idx < concomp_seg_cnt; seg_idx++) {
00424       // too small of a segment: ignore
00425       if (concomp_seg_array[seg_idx]->Width() < 2 &&
00426           concomp_seg_array[seg_idx]->Height() < 2) {
00427         delete concomp_seg_array[seg_idx];
00428       } else {
00429         // add the new segment
00430         // extend the segment array
00431         if ((seg_cnt % kConCompAllocChunk) == 0) {
00432           ConComp **temp_segm_array =
00433               new ConComp *[seg_cnt + kConCompAllocChunk];
00434           if (temp_segm_array == NULL) {
00435             fprintf(stderr, "Cube ERROR (CharSamp::Segment): could not "
00436                     "allocate additional connected components\n");
00437             delete []concomp_seg_array;
00438             delete []concomp_array;
00439             delete []seg_array;
00440             return NULL;
00441           }
00442           if (seg_cnt > 0) {
00443             memcpy(temp_segm_array, seg_array, seg_cnt * sizeof(*seg_array));
00444             delete []seg_array;
00445           }
00446           seg_array = temp_segm_array;
00447         }
00448         seg_array[seg_cnt++] = concomp_seg_array[seg_idx];
00449       }
00450     }  // segment
00451     if (concomp_alloc_seg != NULL) {
00452       delete []concomp_alloc_seg;
00453     }
00454   }  // concomp
00455   delete []concomp_array;
00456 
00457   // sort the concomps from Left2Right or Right2Left, based on the reading order
00458   if (seg_cnt > 0 && seg_array != NULL) {
00459     qsort(seg_array, seg_cnt, sizeof(*seg_array), right_2_left ?
00460         ConComp::Right2LeftComparer : ConComp::Left2RightComparer);
00461   }
00462   (*segment_cnt) = seg_cnt;
00463   return seg_array;
00464 }
00465 
00466 // builds a char samp from a set of connected components
00467 CharSamp *CharSamp::FromConComps(ConComp **concomp_array, int strt_concomp,
00468                                  int seg_flags_size, int *seg_flags,
00469                                  bool *left_most, bool *right_most,
00470                                  int word_hgt) {
00471   int concomp;
00472   int end_concomp;
00473   int concomp_cnt = 0;
00474   end_concomp = strt_concomp + seg_flags_size;
00475   // determine ID range
00476   bool once = false;
00477   int min_id = -1;
00478   int max_id = -1;
00479   for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
00480     if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
00481       if (!once) {
00482         min_id = concomp_array[concomp]->ID();
00483         max_id = concomp_array[concomp]->ID();
00484         once = true;
00485       } else {
00486         UpdateRange(concomp_array[concomp]->ID(), &min_id, &max_id);
00487       }
00488       concomp_cnt++;
00489     }
00490   }
00491   if (concomp_cnt < 1 || !once || min_id == -1 || max_id == -1) {
00492     return NULL;
00493   }
00494   // alloc memo for computing leftmost and right most attributes
00495   int id_cnt = max_id - min_id + 1;
00496   bool *id_exist = new bool[id_cnt];
00497   bool *left_most_exist = new bool[id_cnt];
00498   bool *right_most_exist = new bool[id_cnt];
00499   if (!id_exist || !left_most_exist || !right_most_exist)
00500     return NULL;
00501   memset(id_exist, 0, id_cnt * sizeof(*id_exist));
00502   memset(left_most_exist, 0, id_cnt * sizeof(*left_most_exist));
00503   memset(right_most_exist, 0, id_cnt * sizeof(*right_most_exist));
00504   // find the dimensions of the charsamp
00505   once = false;
00506   int left = -1;
00507   int right = -1;
00508   int top = -1;
00509   int bottom = -1;
00510   int unq_ids = 0;
00511   int unq_left_most = 0;
00512   int unq_right_most = 0;
00513   for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
00514     if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
00515       if (!once) {
00516         left = concomp_array[concomp]->Left();
00517         right = concomp_array[concomp]->Right();
00518         top = concomp_array[concomp]->Top();
00519         bottom = concomp_array[concomp]->Bottom();
00520         once = true;
00521       } else {
00522         UpdateRange(concomp_array[concomp]->Left(),
00523                     concomp_array[concomp]->Right(), &left, &right);
00524         UpdateRange(concomp_array[concomp]->Top(),
00525                     concomp_array[concomp]->Bottom(), &top, &bottom);
00526       }
00527       // count unq ids, unq left most and right mosts ids
00528       int concomp_id = concomp_array[concomp]->ID() - min_id;
00529       if (!id_exist[concomp_id]) {
00530         id_exist[concomp_id] = true;
00531         unq_ids++;
00532       }
00533       if (concomp_array[concomp]->LeftMost()) {
00534         if (left_most_exist[concomp_id] == false) {
00535           left_most_exist[concomp_id] = true;
00536           unq_left_most++;
00537         }
00538       }
00539       if (concomp_array[concomp]->RightMost()) {
00540         if (right_most_exist[concomp_id] == false) {
00541           right_most_exist[concomp_id] = true;
00542           unq_right_most++;
00543         }
00544       }
00545     }
00546   }
00547   delete []id_exist;
00548   delete []left_most_exist;
00549   delete []right_most_exist;
00550   if (!once || left == -1 || top == -1 || right == -1 || bottom == -1) {
00551     return NULL;
00552   }
00553   (*left_most) = (unq_left_most >= unq_ids);
00554   (*right_most) = (unq_right_most >= unq_ids);
00555   // create the char sample object
00556   CharSamp *samp = new CharSamp(left, top, right - left + 1, bottom - top + 1);
00557   if (!samp) {
00558     return NULL;
00559   }
00560 
00561   // set the foreground pixels
00562   for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
00563     if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
00564       ConCompPt *pt_ptr = concomp_array[concomp]->Head();
00565       while (pt_ptr) {
00566         samp->line_buff_[pt_ptr->y() - top][pt_ptr->x() - left] = 0;
00567         pt_ptr = pt_ptr->Next();
00568       }
00569     }
00570   }
00571   return samp;
00572 }
00573 
00574 // clones the object
00575 CharSamp *CharSamp::Clone() const {
00576   // create the cropped char samp
00577   CharSamp *samp = new CharSamp(left_, top_, wid_, hgt_);
00578   samp->SetLabel(label32_);
00579   samp->SetFirstChar(first_char_);
00580   samp->SetLastChar(last_char_);
00581   samp->SetNormTop(norm_top_);
00582   samp->SetNormBottom(norm_bottom_);
00583   samp->SetNormAspectRatio(norm_aspect_ratio_);
00584   // copy the bitmap to the cropped img
00585   Copy(0, 0, wid_, hgt_, samp);
00586   return samp;
00587 }
00588 
00589 // Load a Char Samp from a dump file
00590 CharSamp *CharSamp::FromCharDumpFile(unsigned char **raw_data_ptr) {
00591   unsigned int val32;
00592   char_32 *label32;
00593   unsigned char *raw_data = *raw_data_ptr;
00594 
00595   // read and check 32 bit marker
00596   memcpy(&val32, raw_data, sizeof(val32));
00597   raw_data += sizeof(val32);
00598   if (val32 != 0xabd0fefe) {
00599     return NULL;
00600   }
00601   // read label length,
00602   memcpy(&val32, raw_data, sizeof(val32));
00603   raw_data += sizeof(val32);
00604   // the label is not null terminated in the file
00605   if (val32 > 0) {
00606     label32 = new char_32[val32 + 1];
00607     if (label32 == NULL) {
00608       return NULL;
00609     }
00610     // read label
00611     memcpy(label32, raw_data, val32 * sizeof(*label32));
00612     raw_data += (val32 * sizeof(*label32));
00613     // null terminate
00614     label32[val32] = 0;
00615   } else {
00616     label32 = NULL;
00617   }
00618 
00619   // create the object
00620   CharSamp *char_samp = new CharSamp();
00621   if (char_samp == NULL) {
00622     return NULL;
00623   }
00624 
00625   // read coordinates
00626   char_samp->label32_ = label32;
00627   memcpy(&char_samp->page_, raw_data, sizeof(char_samp->page_));
00628   raw_data += sizeof(char_samp->page_);
00629   memcpy(&char_samp->left_, raw_data, sizeof(char_samp->left_));
00630   raw_data += sizeof(char_samp->left_);
00631   memcpy(&char_samp->top_, raw_data, sizeof(char_samp->top_));
00632   raw_data += sizeof(char_samp->top_);
00633   memcpy(&char_samp->first_char_, raw_data, sizeof(char_samp->first_char_));
00634   raw_data += sizeof(char_samp->first_char_);
00635   memcpy(&char_samp->last_char_, raw_data, sizeof(char_samp->last_char_));
00636   raw_data += sizeof(char_samp->last_char_);
00637   memcpy(&char_samp->norm_top_, raw_data, sizeof(char_samp->norm_top_));
00638   raw_data += sizeof(char_samp->norm_top_);
00639   memcpy(&char_samp->norm_bottom_, raw_data, sizeof(char_samp->norm_bottom_));
00640   raw_data += sizeof(char_samp->norm_bottom_);
00641   memcpy(&char_samp->norm_aspect_ratio_, raw_data,
00642          sizeof(char_samp->norm_aspect_ratio_));
00643   raw_data += sizeof(char_samp->norm_aspect_ratio_);
00644 
00645   // load the Bmp8 part
00646   if (char_samp->LoadFromCharDumpFile(&raw_data) == false) {
00647     delete char_samp;
00648     return NULL;
00649   }
00650 
00651   (*raw_data_ptr) = raw_data;
00652   return char_samp;
00653 }
00654 
00655 // computes the features corresponding to the char sample
00656 bool CharSamp::ComputeFeatures(int conv_grid_size, float *features) {
00657   // Create a scaled BMP
00658   CharSamp *scaled_bmp = Scale(conv_grid_size, conv_grid_size);
00659   if (!scaled_bmp) {
00660     return false;
00661   }
00662   // prepare input
00663   unsigned char *buff = scaled_bmp->RawData();
00664   // bitmap features
00665   int input;
00666   int bmp_size = conv_grid_size * conv_grid_size;
00667   for (input = 0; input < bmp_size; input++) {
00668     features[input] = 255.0f - (1.0f * buff[input]);
00669   }
00670   // word context features
00671   features[input++] = FirstChar();
00672   features[input++] = LastChar();
00673   features[input++] = NormTop();
00674   features[input++] = NormBottom();
00675   features[input++] = NormAspectRatio();
00676   delete scaled_bmp;
00677   return true;
00678 }
00679 }  // namespace tesseract