Tesseract  3.02
tesseract-ocr/cube/cached_file.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        cached_file.pp
00003  * Description: Implementation of an Cached File Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include <string>
00021 #include <stdlib.h>
00022 #include <cstring>
00023 #include "cached_file.h"
00024 
00025 namespace tesseract {
00026 
00027 CachedFile::CachedFile(string file_name) {
00028   file_name_ = file_name;
00029   buff_ = NULL;
00030   buff_pos_ = 0;
00031   buff_size_ = 0;
00032   file_pos_ = 0;
00033   file_size_ = 0;
00034   fp_ = NULL;
00035 }
00036 
00037 CachedFile::~CachedFile() {
00038   if (fp_ != NULL) {
00039     fclose(fp_);
00040     fp_ = NULL;
00041   }
00042 
00043   if (buff_ != NULL) {
00044     delete []buff_;
00045     buff_ = NULL;
00046   }
00047 }
00048 
00049 // free buffers and init vars
00050 bool CachedFile::Open() {
00051   if (fp_ != NULL) {
00052     return true;
00053   }
00054 
00055   fp_ = fopen(file_name_.c_str(), "rb");
00056   if (fp_ == NULL) {
00057     return false;
00058   }
00059 
00060   // seek to the end
00061   fseek(fp_, 0, SEEK_END);
00062   // get file size
00063   file_size_ = ftell(fp_);
00064   if (file_size_ < 1) {
00065     return false;
00066   }
00067   // rewind again
00068   rewind(fp_);
00069   // alloc memory for buffer
00070   buff_ = new unsigned char[kCacheSize];
00071   if (buff_ == NULL) {
00072     return false;
00073   }
00074   // init counters
00075   buff_size_ = 0;
00076   buff_pos_ = 0;
00077   file_pos_ = 0;
00078   return true;
00079 }
00080 
00081 // add a new sample
00082 int CachedFile::Read(void *read_buff, int bytes) {
00083   int read_bytes = 0;
00084   unsigned char *buff = (unsigned char *)read_buff;
00085 
00086   // do we need to read beyond the buffer
00087   if ((buff_pos_ + bytes) > buff_size_) {
00088     // copy as much bytes from the current buffer if any
00089     int copy_bytes = buff_size_ - buff_pos_;
00090 
00091     if (copy_bytes > 0) {
00092       memcpy(buff, buff_ + buff_pos_, copy_bytes);
00093       buff += copy_bytes;
00094       bytes -= copy_bytes;
00095       read_bytes += copy_bytes;
00096     }
00097 
00098     // determine how much to read
00099     buff_size_ = kCacheSize;
00100 
00101     if ((file_pos_ + buff_size_) > file_size_) {
00102       buff_size_ = static_cast<int>(file_size_ - file_pos_);
00103     }
00104 
00105     // EOF ?
00106     if (buff_size_ <= 0 || bytes > buff_size_) {
00107       return read_bytes;
00108     }
00109 
00110     // read the first chunck
00111     if (fread(buff_, 1, buff_size_, fp_) != buff_size_) {
00112       return read_bytes;
00113     }
00114 
00115     buff_pos_ = 0;
00116     file_pos_ += buff_size_;
00117   }
00118 
00119   memcpy(buff, buff_ + buff_pos_, bytes);
00120   read_bytes += bytes;
00121   buff_pos_ += bytes;
00122 
00123   return read_bytes;
00124 }
00125 
00126 long CachedFile::Size() {
00127   if (fp_ == NULL && Open() == false) {
00128     return 0;
00129   }
00130 
00131   return file_size_;
00132 }
00133 
00134 long CachedFile::Tell() {
00135   if (fp_ == NULL && Open() == false) {
00136     return 0;
00137   }
00138 
00139   return file_pos_ - buff_size_ + buff_pos_;
00140 }
00141 
00142 bool CachedFile::eof() {
00143   if (fp_ == NULL && Open() == false) {
00144     return true;
00145   }
00146 
00147   return (file_pos_ - buff_size_ + buff_pos_) >= file_size_;
00148 }
00149 
00150 }  // namespace tesseract