Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: char_samp.cpp 00003 * Description: Implementation of a Character Bitmap Sample Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include <string.h> 00021 #include <string> 00022 #include "char_samp.h" 00023 #include "cube_utils.h" 00024 00025 namespace tesseract { 00026 00027 #define MAX_LINE_LEN 1024 00028 00029 CharSamp::CharSamp() 00030 : Bmp8(0, 0) { 00031 left_ = 0; 00032 top_ = 0; 00033 label32_ = NULL; 00034 page_ = -1; 00035 } 00036 00037 CharSamp::CharSamp(int wid, int hgt) 00038 : Bmp8(wid, hgt) { 00039 left_ = 0; 00040 top_ = 0; 00041 label32_ = NULL; 00042 page_ = -1; 00043 } 00044 00045 CharSamp::CharSamp(int left, int top, int wid, int hgt) 00046 : Bmp8(wid, hgt) 00047 , left_(left) 00048 , top_(top) { 00049 label32_ = NULL; 00050 page_ = -1; 00051 } 00052 00053 CharSamp::~CharSamp() { 00054 if (label32_ != NULL) { 00055 delete []label32_; 00056 label32_ = NULL; 00057 } 00058 } 00059 00060 // returns a UTF-8 version of the string label 00061 string CharSamp::stringLabel() const { 00062 string str = ""; 00063 if (label32_ != NULL) { 00064 string_32 str32(label32_); 00065 CubeUtils::UTF32ToUTF8(str32.c_str(), &str); 00066 } 00067 return str; 00068 } 00069 00070 // set a the string label using a UTF encoded string 00071 void CharSamp::SetLabel(string str) { 00072 if (label32_ != NULL) { 00073 delete []label32_; 00074 label32_ = NULL; 00075 } 00076 string_32 str32; 00077 CubeUtils::UTF8ToUTF32(str.c_str(), &str32); 00078 SetLabel(reinterpret_cast<const char_32 *>(str32.c_str())); 00079 } 00080 00081 // creates a CharSamp object from file 00082 CharSamp *CharSamp::FromCharDumpFile(CachedFile *fp) { 00083 unsigned short left; 00084 unsigned short top; 00085 unsigned short page; 00086 unsigned short first_char; 00087 unsigned short last_char; 00088 unsigned short norm_top; 00089 unsigned short norm_bottom; 00090 unsigned short norm_aspect_ratio; 00091 unsigned int val32; 00092 00093 char_32 *label32; 00094 00095 // read and check 32 bit marker 00096 if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) { 00097 return NULL; 00098 } 00099 if (val32 != 0xabd0fefe) { 00100 return NULL; 00101 } 00102 // read label length, 00103 if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) { 00104 return NULL; 00105 } 00106 // the label is not null terminated in the file 00107 if (val32 > 0) { 00108 label32 = new char_32[val32 + 1]; 00109 if (label32 == NULL) { 00110 return NULL; 00111 } 00112 // read label 00113 if (fp->Read(label32, val32 * sizeof(*label32)) != 00114 (val32 * sizeof(*label32))) { 00115 return NULL; 00116 } 00117 // null terminate 00118 label32[val32] = 0; 00119 } else { 00120 label32 = NULL; 00121 } 00122 // read coordinates 00123 if (fp->Read(&page, sizeof(page)) != sizeof(page)) { 00124 return NULL; 00125 } 00126 if (fp->Read(&left, sizeof(left)) != sizeof(left)) { 00127 return NULL; 00128 } 00129 if (fp->Read(&top, sizeof(top)) != sizeof(top)) { 00130 return NULL; 00131 } 00132 if (fp->Read(&first_char, sizeof(first_char)) != sizeof(first_char)) { 00133 return NULL; 00134 } 00135 if (fp->Read(&last_char, sizeof(last_char)) != sizeof(last_char)) { 00136 return NULL; 00137 } 00138 if (fp->Read(&norm_top, sizeof(norm_top)) != sizeof(norm_top)) { 00139 return NULL; 00140 } 00141 if (fp->Read(&norm_bottom, sizeof(norm_bottom)) != sizeof(norm_bottom)) { 00142 return NULL; 00143 } 00144 if (fp->Read(&norm_aspect_ratio, sizeof(norm_aspect_ratio)) != 00145 sizeof(norm_aspect_ratio)) { 00146 return NULL; 00147 } 00148 // create the object 00149 CharSamp *char_samp = new CharSamp(); 00150 if (char_samp == NULL) { 00151 return NULL; 00152 } 00153 // init 00154 char_samp->label32_ = label32; 00155 char_samp->page_ = page; 00156 char_samp->left_ = left; 00157 char_samp->top_ = top; 00158 char_samp->first_char_ = first_char; 00159 char_samp->last_char_ = last_char; 00160 char_samp->norm_top_ = norm_top; 00161 char_samp->norm_bottom_ = norm_bottom; 00162 char_samp->norm_aspect_ratio_ = norm_aspect_ratio; 00163 // load the Bmp8 part 00164 if (char_samp->LoadFromCharDumpFile(fp) == false) { 00165 delete char_samp; 00166 return NULL; 00167 } 00168 return char_samp; 00169 } 00170 00171 // Load a Char Samp from a dump file 00172 CharSamp *CharSamp::FromCharDumpFile(FILE *fp) { 00173 unsigned short left; 00174 unsigned short top; 00175 unsigned short page; 00176 unsigned short first_char; 00177 unsigned short last_char; 00178 unsigned short norm_top; 00179 unsigned short norm_bottom; 00180 unsigned short norm_aspect_ratio; 00181 unsigned int val32; 00182 char_32 *label32; 00183 00184 // read and check 32 bit marker 00185 if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { 00186 return NULL; 00187 } 00188 if (val32 != 0xabd0fefe) { 00189 return NULL; 00190 } 00191 // read label length, 00192 if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { 00193 return NULL; 00194 } 00195 // the label is not null terminated in the file 00196 if (val32 > 0) { 00197 label32 = new char_32[val32 + 1]; 00198 if (label32 == NULL) { 00199 return NULL; 00200 } 00201 // read label 00202 if (fread(label32, 1, val32 * sizeof(*label32), fp) != 00203 (val32 * sizeof(*label32))) { 00204 return NULL; 00205 } 00206 // null terminate 00207 label32[val32] = 0; 00208 } else { 00209 label32 = NULL; 00210 } 00211 // read coordinates 00212 if (fread(&page, 1, sizeof(page), fp) != sizeof(page)) { 00213 return NULL; 00214 } 00215 if (fread(&left, 1, sizeof(left), fp) != sizeof(left)) { 00216 return NULL; 00217 } 00218 if (fread(&top, 1, sizeof(top), fp) != sizeof(top)) { 00219 return NULL; 00220 } 00221 if (fread(&first_char, 1, sizeof(first_char), fp) != sizeof(first_char)) { 00222 return NULL; 00223 } 00224 if (fread(&last_char, 1, sizeof(last_char), fp) != sizeof(last_char)) { 00225 return NULL; 00226 } 00227 if (fread(&norm_top, 1, sizeof(norm_top), fp) != sizeof(norm_top)) { 00228 return NULL; 00229 } 00230 if (fread(&norm_bottom, 1, sizeof(norm_bottom), fp) != sizeof(norm_bottom)) { 00231 return NULL; 00232 } 00233 if (fread(&norm_aspect_ratio, 1, sizeof(norm_aspect_ratio), fp) != 00234 sizeof(norm_aspect_ratio)) { 00235 return NULL; 00236 } 00237 // create the object 00238 CharSamp *char_samp = new CharSamp(); 00239 if (char_samp == NULL) { 00240 return NULL; 00241 } 00242 // init 00243 char_samp->label32_ = label32; 00244 char_samp->page_ = page; 00245 char_samp->left_ = left; 00246 char_samp->top_ = top; 00247 char_samp->first_char_ = first_char; 00248 char_samp->last_char_ = last_char; 00249 char_samp->norm_top_ = norm_top; 00250 char_samp->norm_bottom_ = norm_bottom; 00251 char_samp->norm_aspect_ratio_ = norm_aspect_ratio; 00252 // load the Bmp8 part 00253 if (char_samp->LoadFromCharDumpFile(fp) == false) { 00254 return NULL; 00255 } 00256 return char_samp; 00257 } 00258 00259 // returns a copy of the charsamp that is scaled to the 00260 // specified width and height 00261 CharSamp *CharSamp::Scale(int wid, int hgt, bool isotropic) { 00262 CharSamp *scaled_samp = new CharSamp(wid, hgt); 00263 if (scaled_samp == NULL) { 00264 return NULL; 00265 } 00266 if (scaled_samp->ScaleFrom(this, isotropic) == false) { 00267 delete scaled_samp; 00268 return NULL; 00269 } 00270 scaled_samp->left_ = left_; 00271 scaled_samp->top_ = top_; 00272 scaled_samp->page_ = page_; 00273 scaled_samp->SetLabel(label32_); 00274 scaled_samp->first_char_ = first_char_; 00275 scaled_samp->last_char_ = last_char_; 00276 scaled_samp->norm_top_ = norm_top_; 00277 scaled_samp->norm_bottom_ = norm_bottom_; 00278 scaled_samp->norm_aspect_ratio_ = norm_aspect_ratio_; 00279 return scaled_samp; 00280 } 00281 00282 // Load a Char Samp from a dump file 00283 CharSamp *CharSamp::FromRawData(int left, int top, int wid, int hgt, 00284 unsigned char *data) { 00285 // create the object 00286 CharSamp *char_samp = new CharSamp(left, top, wid, hgt); 00287 if (char_samp == NULL) { 00288 return NULL; 00289 } 00290 if (char_samp->LoadFromRawData(data) == false) { 00291 delete char_samp; 00292 return NULL; 00293 } 00294 return char_samp; 00295 } 00296 00297 // Saves the charsamp to a dump file 00298 bool CharSamp::Save2CharDumpFile(FILE *fp) const { 00299 unsigned int val32; 00300 // write and check 32 bit marker 00301 val32 = 0xabd0fefe; 00302 if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { 00303 return false; 00304 } 00305 // write label length 00306 val32 = (label32_ == NULL) ? 0 : LabelLen(label32_); 00307 if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) { 00308 return false; 00309 } 00310 // write label 00311 if (label32_ != NULL) { 00312 if (fwrite(label32_, 1, val32 * sizeof(*label32_), fp) != 00313 (val32 * sizeof(*label32_))) { 00314 return false; 00315 } 00316 } 00317 // write coordinates 00318 if (fwrite(&page_, 1, sizeof(page_), fp) != sizeof(page_)) { 00319 return false; 00320 } 00321 if (fwrite(&left_, 1, sizeof(left_), fp) != sizeof(left_)) { 00322 return false; 00323 } 00324 if (fwrite(&top_, 1, sizeof(top_), fp) != sizeof(top_)) { 00325 return false; 00326 } 00327 if (fwrite(&first_char_, 1, sizeof(first_char_), fp) != 00328 sizeof(first_char_)) { 00329 return false; 00330 } 00331 if (fwrite(&last_char_, 1, sizeof(last_char_), fp) != sizeof(last_char_)) { 00332 return false; 00333 } 00334 if (fwrite(&norm_top_, 1, sizeof(norm_top_), fp) != sizeof(norm_top_)) { 00335 return false; 00336 } 00337 if (fwrite(&norm_bottom_, 1, sizeof(norm_bottom_), fp) != 00338 sizeof(norm_bottom_)) { 00339 return false; 00340 } 00341 if (fwrite(&norm_aspect_ratio_, 1, sizeof(norm_aspect_ratio_), fp) != 00342 sizeof(norm_aspect_ratio_)) { 00343 return false; 00344 } 00345 if (SaveBmp2CharDumpFile(fp) == false) { 00346 return false; 00347 } 00348 return true; 00349 } 00350 00351 // Crop the char samp such that there are no white spaces on any side. 00352 // The norm_top_ and norm_bottom_ fields are the character top/bottom 00353 // with respect to whatever context the character is being recognized 00354 // in (e.g. word bounding box) normalized to a standard size of 00355 // 255. Here they default to 0 and 255 (word box boundaries), but 00356 // since they are context dependent, they may need to be reset by the 00357 // calling function. 00358 CharSamp *CharSamp::Crop() { 00359 // get the dimesions of the cropped img 00360 int cropped_left = 0; 00361 int cropped_top = 0; 00362 int cropped_wid = wid_; 00363 int cropped_hgt = hgt_; 00364 Bmp8::Crop(&cropped_left, &cropped_top, 00365 &cropped_wid, &cropped_hgt); 00366 00367 if (cropped_wid == 0 || cropped_hgt == 0) { 00368 return NULL; 00369 } 00370 // create the cropped char samp 00371 CharSamp *cropped_samp = new CharSamp(left_ + cropped_left, 00372 top_ + cropped_top, 00373 cropped_wid, cropped_hgt); 00374 cropped_samp->SetLabel(label32_); 00375 cropped_samp->SetFirstChar(first_char_); 00376 cropped_samp->SetLastChar(last_char_); 00377 // the following 3 fields may/should be reset by the calling function 00378 // using context information, i.e., location of character box 00379 // w.r.t. the word bounding box 00380 cropped_samp->SetNormAspectRatio(255 * 00381 cropped_wid / (cropped_wid + cropped_hgt)); 00382 cropped_samp->SetNormTop(0); 00383 cropped_samp->SetNormBottom(255); 00384 00385 // copy the bitmap to the cropped img 00386 Copy(cropped_left, cropped_top, cropped_wid, cropped_hgt, cropped_samp); 00387 return cropped_samp; 00388 } 00389 00390 // segment the char samp to connected components 00391 // based on contiguity and vertical pixel density histogram 00392 ConComp **CharSamp::Segment(int *segment_cnt, bool right_2_left, 00393 int max_hist_wnd, int min_con_comp_size) const { 00394 // init 00395 (*segment_cnt) = 0; 00396 int concomp_cnt = 0; 00397 int seg_cnt = 0; 00398 // find the concomps of the image 00399 ConComp **concomp_array = FindConComps(&concomp_cnt, min_con_comp_size); 00400 if (concomp_cnt <= 0 || !concomp_array) { 00401 if (concomp_array) 00402 delete []concomp_array; 00403 return NULL; 00404 } 00405 ConComp **seg_array = NULL; 00406 // segment each concomp further using vertical histogram 00407 for (int concomp = 0; concomp < concomp_cnt; concomp++) { 00408 int concomp_seg_cnt = 0; 00409 // segment the concomp 00410 ConComp **concomp_seg_array = NULL; 00411 ConComp **concomp_alloc_seg = 00412 concomp_array[concomp]->Segment(max_hist_wnd, &concomp_seg_cnt); 00413 // no segments, add the whole concomp 00414 if (concomp_alloc_seg == NULL) { 00415 concomp_seg_cnt = 1; 00416 concomp_seg_array = concomp_array + concomp; 00417 } else { 00418 // delete the original concomp, we no longer need it 00419 concomp_seg_array = concomp_alloc_seg; 00420 delete concomp_array[concomp]; 00421 } 00422 // add the resulting segments 00423 for (int seg_idx = 0; seg_idx < concomp_seg_cnt; seg_idx++) { 00424 // too small of a segment: ignore 00425 if (concomp_seg_array[seg_idx]->Width() < 2 && 00426 concomp_seg_array[seg_idx]->Height() < 2) { 00427 delete concomp_seg_array[seg_idx]; 00428 } else { 00429 // add the new segment 00430 // extend the segment array 00431 if ((seg_cnt % kConCompAllocChunk) == 0) { 00432 ConComp **temp_segm_array = 00433 new ConComp *[seg_cnt + kConCompAllocChunk]; 00434 if (temp_segm_array == NULL) { 00435 fprintf(stderr, "Cube ERROR (CharSamp::Segment): could not " 00436 "allocate additional connected components\n"); 00437 delete []concomp_seg_array; 00438 delete []concomp_array; 00439 delete []seg_array; 00440 return NULL; 00441 } 00442 if (seg_cnt > 0) { 00443 memcpy(temp_segm_array, seg_array, seg_cnt * sizeof(*seg_array)); 00444 delete []seg_array; 00445 } 00446 seg_array = temp_segm_array; 00447 } 00448 seg_array[seg_cnt++] = concomp_seg_array[seg_idx]; 00449 } 00450 } // segment 00451 if (concomp_alloc_seg != NULL) { 00452 delete []concomp_alloc_seg; 00453 } 00454 } // concomp 00455 delete []concomp_array; 00456 00457 // sort the concomps from Left2Right or Right2Left, based on the reading order 00458 if (seg_cnt > 0 && seg_array != NULL) { 00459 qsort(seg_array, seg_cnt, sizeof(*seg_array), right_2_left ? 00460 ConComp::Right2LeftComparer : ConComp::Left2RightComparer); 00461 } 00462 (*segment_cnt) = seg_cnt; 00463 return seg_array; 00464 } 00465 00466 // builds a char samp from a set of connected components 00467 CharSamp *CharSamp::FromConComps(ConComp **concomp_array, int strt_concomp, 00468 int seg_flags_size, int *seg_flags, 00469 bool *left_most, bool *right_most, 00470 int word_hgt) { 00471 int concomp; 00472 int end_concomp; 00473 int concomp_cnt = 0; 00474 end_concomp = strt_concomp + seg_flags_size; 00475 // determine ID range 00476 bool once = false; 00477 int min_id = -1; 00478 int max_id = -1; 00479 for (concomp = strt_concomp; concomp < end_concomp; concomp++) { 00480 if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) { 00481 if (!once) { 00482 min_id = concomp_array[concomp]->ID(); 00483 max_id = concomp_array[concomp]->ID(); 00484 once = true; 00485 } else { 00486 UpdateRange(concomp_array[concomp]->ID(), &min_id, &max_id); 00487 } 00488 concomp_cnt++; 00489 } 00490 } 00491 if (concomp_cnt < 1 || !once || min_id == -1 || max_id == -1) { 00492 return NULL; 00493 } 00494 // alloc memo for computing leftmost and right most attributes 00495 int id_cnt = max_id - min_id + 1; 00496 bool *id_exist = new bool[id_cnt]; 00497 bool *left_most_exist = new bool[id_cnt]; 00498 bool *right_most_exist = new bool[id_cnt]; 00499 if (!id_exist || !left_most_exist || !right_most_exist) 00500 return NULL; 00501 memset(id_exist, 0, id_cnt * sizeof(*id_exist)); 00502 memset(left_most_exist, 0, id_cnt * sizeof(*left_most_exist)); 00503 memset(right_most_exist, 0, id_cnt * sizeof(*right_most_exist)); 00504 // find the dimensions of the charsamp 00505 once = false; 00506 int left = -1; 00507 int right = -1; 00508 int top = -1; 00509 int bottom = -1; 00510 int unq_ids = 0; 00511 int unq_left_most = 0; 00512 int unq_right_most = 0; 00513 for (concomp = strt_concomp; concomp < end_concomp; concomp++) { 00514 if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) { 00515 if (!once) { 00516 left = concomp_array[concomp]->Left(); 00517 right = concomp_array[concomp]->Right(); 00518 top = concomp_array[concomp]->Top(); 00519 bottom = concomp_array[concomp]->Bottom(); 00520 once = true; 00521 } else { 00522 UpdateRange(concomp_array[concomp]->Left(), 00523 concomp_array[concomp]->Right(), &left, &right); 00524 UpdateRange(concomp_array[concomp]->Top(), 00525 concomp_array[concomp]->Bottom(), &top, &bottom); 00526 } 00527 // count unq ids, unq left most and right mosts ids 00528 int concomp_id = concomp_array[concomp]->ID() - min_id; 00529 if (!id_exist[concomp_id]) { 00530 id_exist[concomp_id] = true; 00531 unq_ids++; 00532 } 00533 if (concomp_array[concomp]->LeftMost()) { 00534 if (left_most_exist[concomp_id] == false) { 00535 left_most_exist[concomp_id] = true; 00536 unq_left_most++; 00537 } 00538 } 00539 if (concomp_array[concomp]->RightMost()) { 00540 if (right_most_exist[concomp_id] == false) { 00541 right_most_exist[concomp_id] = true; 00542 unq_right_most++; 00543 } 00544 } 00545 } 00546 } 00547 delete []id_exist; 00548 delete []left_most_exist; 00549 delete []right_most_exist; 00550 if (!once || left == -1 || top == -1 || right == -1 || bottom == -1) { 00551 return NULL; 00552 } 00553 (*left_most) = (unq_left_most >= unq_ids); 00554 (*right_most) = (unq_right_most >= unq_ids); 00555 // create the char sample object 00556 CharSamp *samp = new CharSamp(left, top, right - left + 1, bottom - top + 1); 00557 if (!samp) { 00558 return NULL; 00559 } 00560 00561 // set the foreground pixels 00562 for (concomp = strt_concomp; concomp < end_concomp; concomp++) { 00563 if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) { 00564 ConCompPt *pt_ptr = concomp_array[concomp]->Head(); 00565 while (pt_ptr) { 00566 samp->line_buff_[pt_ptr->y() - top][pt_ptr->x() - left] = 0; 00567 pt_ptr = pt_ptr->Next(); 00568 } 00569 } 00570 } 00571 return samp; 00572 } 00573 00574 // clones the object 00575 CharSamp *CharSamp::Clone() const { 00576 // create the cropped char samp 00577 CharSamp *samp = new CharSamp(left_, top_, wid_, hgt_); 00578 samp->SetLabel(label32_); 00579 samp->SetFirstChar(first_char_); 00580 samp->SetLastChar(last_char_); 00581 samp->SetNormTop(norm_top_); 00582 samp->SetNormBottom(norm_bottom_); 00583 samp->SetNormAspectRatio(norm_aspect_ratio_); 00584 // copy the bitmap to the cropped img 00585 Copy(0, 0, wid_, hgt_, samp); 00586 return samp; 00587 } 00588 00589 // Load a Char Samp from a dump file 00590 CharSamp *CharSamp::FromCharDumpFile(unsigned char **raw_data_ptr) { 00591 unsigned int val32; 00592 char_32 *label32; 00593 unsigned char *raw_data = *raw_data_ptr; 00594 00595 // read and check 32 bit marker 00596 memcpy(&val32, raw_data, sizeof(val32)); 00597 raw_data += sizeof(val32); 00598 if (val32 != 0xabd0fefe) { 00599 return NULL; 00600 } 00601 // read label length, 00602 memcpy(&val32, raw_data, sizeof(val32)); 00603 raw_data += sizeof(val32); 00604 // the label is not null terminated in the file 00605 if (val32 > 0) { 00606 label32 = new char_32[val32 + 1]; 00607 if (label32 == NULL) { 00608 return NULL; 00609 } 00610 // read label 00611 memcpy(label32, raw_data, val32 * sizeof(*label32)); 00612 raw_data += (val32 * sizeof(*label32)); 00613 // null terminate 00614 label32[val32] = 0; 00615 } else { 00616 label32 = NULL; 00617 } 00618 00619 // create the object 00620 CharSamp *char_samp = new CharSamp(); 00621 if (char_samp == NULL) { 00622 return NULL; 00623 } 00624 00625 // read coordinates 00626 char_samp->label32_ = label32; 00627 memcpy(&char_samp->page_, raw_data, sizeof(char_samp->page_)); 00628 raw_data += sizeof(char_samp->page_); 00629 memcpy(&char_samp->left_, raw_data, sizeof(char_samp->left_)); 00630 raw_data += sizeof(char_samp->left_); 00631 memcpy(&char_samp->top_, raw_data, sizeof(char_samp->top_)); 00632 raw_data += sizeof(char_samp->top_); 00633 memcpy(&char_samp->first_char_, raw_data, sizeof(char_samp->first_char_)); 00634 raw_data += sizeof(char_samp->first_char_); 00635 memcpy(&char_samp->last_char_, raw_data, sizeof(char_samp->last_char_)); 00636 raw_data += sizeof(char_samp->last_char_); 00637 memcpy(&char_samp->norm_top_, raw_data, sizeof(char_samp->norm_top_)); 00638 raw_data += sizeof(char_samp->norm_top_); 00639 memcpy(&char_samp->norm_bottom_, raw_data, sizeof(char_samp->norm_bottom_)); 00640 raw_data += sizeof(char_samp->norm_bottom_); 00641 memcpy(&char_samp->norm_aspect_ratio_, raw_data, 00642 sizeof(char_samp->norm_aspect_ratio_)); 00643 raw_data += sizeof(char_samp->norm_aspect_ratio_); 00644 00645 // load the Bmp8 part 00646 if (char_samp->LoadFromCharDumpFile(&raw_data) == false) { 00647 delete char_samp; 00648 return NULL; 00649 } 00650 00651 (*raw_data_ptr) = raw_data; 00652 return char_samp; 00653 } 00654 00655 // computes the features corresponding to the char sample 00656 bool CharSamp::ComputeFeatures(int conv_grid_size, float *features) { 00657 // Create a scaled BMP 00658 CharSamp *scaled_bmp = Scale(conv_grid_size, conv_grid_size); 00659 if (!scaled_bmp) { 00660 return false; 00661 } 00662 // prepare input 00663 unsigned char *buff = scaled_bmp->RawData(); 00664 // bitmap features 00665 int input; 00666 int bmp_size = conv_grid_size * conv_grid_size; 00667 for (input = 0; input < bmp_size; input++) { 00668 features[input] = 255.0f - (1.0f * buff[input]); 00669 } 00670 // word context features 00671 features[input++] = FirstChar(); 00672 features[input++] = LastChar(); 00673 features[input++] = NormTop(); 00674 features[input++] = NormBottom(); 00675 features[input++] = NormAspectRatio(); 00676 delete scaled_bmp; 00677 return true; 00678 } 00679 } // namespace tesseract