Tesseract  3.02
tesseract-ocr/cube/char_samp_set.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        char_samp_enum.cpp
00003  * Description: Implementation of a Character Sample Set Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include <stdlib.h>
00021 #include <string>
00022 #include "char_samp_set.h"
00023 #include "cached_file.h"
00024 
00025 namespace tesseract {
00026 
00027 CharSampSet::CharSampSet() {
00028   cnt_ = 0;
00029   samp_buff_ = NULL;
00030   own_samples_ = false;
00031 }
00032 
00033 CharSampSet::~CharSampSet() {
00034   Cleanup();
00035 }
00036 
00037 // free buffers and init vars
00038 void CharSampSet::Cleanup() {
00039   if (samp_buff_ != NULL) {
00040     // only free samples if owned by class
00041     if (own_samples_ == true) {
00042       for (int samp_idx = 0; samp_idx < cnt_; samp_idx++) {
00043         if (samp_buff_[samp_idx] != NULL) {
00044           delete samp_buff_[samp_idx];
00045         }
00046       }
00047     }
00048     delete []samp_buff_;
00049   }
00050   cnt_ = 0;
00051   samp_buff_ = NULL;
00052 }
00053 
00054 // add a new sample
00055 bool CharSampSet::Add(CharSamp *char_samp) {
00056   if ((cnt_ % SAMP_ALLOC_BLOCK) == 0) {
00057       // create an extended buffer
00058     CharSamp **new_samp_buff =
00059         reinterpret_cast<CharSamp **>(new CharSamp *[cnt_ + SAMP_ALLOC_BLOCK]);
00060     if (new_samp_buff == NULL) {
00061       return false;
00062     }
00063     // copy old contents
00064     if (cnt_ > 0) {
00065       memcpy(new_samp_buff, samp_buff_, cnt_ * sizeof(*samp_buff_));
00066       delete []samp_buff_;
00067     }
00068     samp_buff_ = new_samp_buff;
00069   }
00070   samp_buff_[cnt_++] = char_samp;
00071   return true;
00072 }
00073 
00074 // load char samples from file
00075 bool CharSampSet::LoadCharSamples(FILE *fp) {
00076   // free existing
00077   Cleanup();
00078   // samples are created here and owned by the class
00079   own_samples_ = true;
00080   // start loading char samples
00081   while (feof(fp) == 0) {
00082     CharSamp *new_samp = CharSamp::FromCharDumpFile(fp);
00083     if (new_samp != NULL) {
00084       if (Add(new_samp) == false) {
00085         return false;
00086       }
00087     }
00088   }
00089   return true;
00090 }
00091 
00092 // creates a CharSampSet object from file
00093 CharSampSet * CharSampSet::FromCharDumpFile(string file_name) {
00094   FILE *fp;
00095   unsigned int val32;
00096   // open the file
00097   fp = fopen(file_name.c_str(), "rb");
00098   if (fp == NULL) {
00099     return NULL;
00100   }
00101   // read and verify marker
00102   if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
00103     return NULL;
00104   }
00105   if (val32 != 0xfefeabd0) {
00106     return NULL;
00107   }
00108   // create an object
00109   CharSampSet *samp_set = new CharSampSet();
00110   if (samp_set == NULL) {
00111     return NULL;
00112   }
00113   if (samp_set->LoadCharSamples(fp) == false) {
00114     delete samp_set;
00115     samp_set = NULL;
00116   }
00117   fclose(fp);
00118   return samp_set;
00119 }
00120 
00121 // Create a new Char Dump file
00122 FILE *CharSampSet::CreateCharDumpFile(string file_name) {
00123   FILE *fp;
00124   unsigned int val32;
00125   // create the file
00126   fp =  fopen(file_name.c_str(), "wb");
00127   if (!fp) {
00128     return NULL;
00129   }
00130   // read and verify marker
00131   val32 = 0xfefeabd0;
00132   if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
00133     return NULL;
00134   }
00135   return fp;
00136 }
00137 
00138 // Enumerate the Samples in the set one-by-one calling the enumertor's
00139   // EnumCharSamp method for each sample
00140 bool CharSampSet::EnumSamples(string file_name, CharSampEnum *enum_obj) {
00141   CachedFile *fp_in;
00142   unsigned int val32;
00143   long i64_size,
00144     i64_pos;
00145   // open the file
00146   fp_in = new CachedFile(file_name);
00147   if (fp_in == NULL) {
00148     return false;
00149   }
00150   i64_size = fp_in->Size();
00151   if (i64_size < 1) {
00152     return false;
00153   }
00154   // read and verify marker
00155   if (fp_in->Read(&val32, sizeof(val32)) != sizeof(val32)) {
00156     return false;
00157   }
00158   if (val32 != 0xfefeabd0) {
00159     return false;
00160   }
00161   // start loading char samples
00162   while (fp_in->eof() == false) {
00163     CharSamp *new_samp = CharSamp::FromCharDumpFile(fp_in);
00164     i64_pos = fp_in->Tell();
00165     if (new_samp != NULL) {
00166       bool ret_flag = (enum_obj)->EnumCharSamp(new_samp,
00167                                                (100.0f * i64_pos / i64_size));
00168       delete new_samp;
00169       if (ret_flag == false) {
00170         break;
00171       }
00172     }
00173   }
00174   delete fp_in;
00175   return true;
00176 }
00177 
00178 }  // namespace ocrlib