Tesseract  3.02
tesseract-ocr/cube/bmp_8.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        bmp_8.cpp
00003  * Description: Implementation of an 8-bit Bitmap class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include <stdlib.h>
00021 #include <math.h>
00022 #include <cstring>
00023 #include <algorithm>
00024 #include "bmp_8.h"
00025 #include "con_comp.h"
00026 #ifdef USE_STD_NAMESPACE
00027 using std::min;
00028 using std::max;
00029 #endif
00030 
00031 #ifdef _WIN32
00032 #ifndef M_PI
00033 #define M_PI 3.14159265358979323846
00034 #endif
00035 #endif
00036 
00037 namespace tesseract {
00038 
00039 const int Bmp8::kDeslantAngleCount = (1 + static_cast<int>(0.5f +
00040     (kMaxDeslantAngle - kMinDeslantAngle) / kDeslantAngleDelta));
00041 float *Bmp8::tan_table_ = NULL;
00042 
00043 Bmp8::Bmp8(unsigned short wid, unsigned short hgt)
00044     : wid_(wid)
00045     , hgt_(hgt) {
00046   line_buff_ = CreateBmpBuffer();
00047 }
00048 
00049 Bmp8::~Bmp8() {
00050   FreeBmpBuffer(line_buff_);
00051 }
00052 
00053 // free buffer
00054 void Bmp8::FreeBmpBuffer(unsigned char **buff) {
00055   if (buff != NULL) {
00056     if (buff[0] != NULL) {
00057       delete []buff[0];
00058     }
00059     delete []buff;
00060   }
00061 }
00062 
00063 void Bmp8::FreeBmpBuffer(unsigned int **buff) {
00064   if (buff != NULL) {
00065     if (buff[0] != NULL) {
00066       delete []buff[0];
00067     }
00068     delete []buff;
00069   }
00070 }
00071 
00072 // init bmp buffers
00073 unsigned char **Bmp8::CreateBmpBuffer(unsigned char init_val) {
00074   unsigned char **buff;
00075 
00076   // Check valid sizes
00077   if (!hgt_ || !wid_)
00078     return NULL;
00079 
00080   // compute stride (align on 4 byte boundries)
00081   stride_ = ((wid_ % 4) == 0) ? wid_ : (4 * (1 + (wid_ / 4)));
00082 
00083   buff = (unsigned char **) new unsigned char *[hgt_ * sizeof(*buff)];
00084   if (!buff) {
00085     delete []buff;
00086     return NULL;
00087   }
00088 
00089   // alloc and init memory for buffer and line buffer
00090   buff[0] = (unsigned char *)
00091       new unsigned char[stride_ * hgt_ * sizeof(*buff[0])];
00092   if (!buff[0]) {
00093     return NULL;
00094   }
00095 
00096   memset(buff[0], init_val, stride_ * hgt_ * sizeof(*buff[0]));
00097 
00098   for (int y = 1; y < hgt_; y++) {
00099     buff[y] = buff[y -1] + stride_;
00100   }
00101 
00102   return buff;
00103 }
00104 
00105 // init bmp buffers
00106 unsigned int ** Bmp8::CreateBmpBuffer(int wid, int hgt,
00107                                       unsigned char init_val) {
00108   unsigned int **buff;
00109 
00110   // compute stride (align on 4 byte boundries)
00111   buff = (unsigned int **) new unsigned int *[hgt * sizeof(*buff)];
00112   if (!buff) {
00113     delete []buff;
00114     return NULL;
00115   }
00116 
00117   // alloc and init memory for buffer and line buffer
00118   buff[0] = (unsigned int *) new unsigned int[wid * hgt * sizeof(*buff[0])];
00119   if (!buff[0]) {
00120     return NULL;
00121   }
00122 
00123   memset(buff[0], init_val, wid * hgt * sizeof(*buff[0]));
00124 
00125   for (int y = 1; y < hgt; y++) {
00126     buff[y] = buff[y -1] + wid;
00127   }
00128 
00129   return buff;
00130 }
00131 
00132 // clears the contents of the bmp
00133 bool Bmp8::Clear() {
00134   if (line_buff_ == NULL) {
00135     return false;
00136   }
00137 
00138   memset(line_buff_[0], 0xff, stride_ * hgt_ * sizeof(*line_buff_[0]));
00139   return true;
00140 }
00141 
00142 bool Bmp8::LoadFromCharDumpFile(CachedFile *fp) {
00143   unsigned short wid;
00144   unsigned short hgt;
00145   unsigned short x;
00146   unsigned short y;
00147   int buf_size;
00148   int pix;
00149   int pix_cnt;
00150   unsigned int val32;
00151   unsigned char *buff;
00152 
00153   // read and check 32 bit marker
00154   if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
00155     return false;
00156   }
00157 
00158   if (val32 != kMagicNumber) {
00159     return false;
00160   }
00161 
00162   // read wid and hgt
00163   if (fp->Read(&wid, sizeof(wid)) != sizeof(wid)) {
00164     return false;
00165   }
00166 
00167   if (fp->Read(&hgt, sizeof(hgt)) != sizeof(hgt)) {
00168     return false;
00169   }
00170 
00171   // read buf size
00172   if (fp->Read(&buf_size, sizeof(buf_size)) != sizeof(buf_size)) {
00173     return false;
00174   }
00175 
00176   // validate buf size: for now, only 3 channel (RBG) is supported
00177   pix_cnt = wid * hgt;
00178   if (buf_size != (3 * pix_cnt)) {
00179     return false;
00180   }
00181 
00182   // alloc memory & read the 3 channel buffer
00183   buff = new unsigned char[buf_size];
00184   if (buff == NULL) {
00185     return false;
00186   }
00187 
00188   if (fp->Read(buff, buf_size) != buf_size) {
00189     delete []buff;
00190     return false;
00191   }
00192 
00193   // create internal buffers
00194   wid_ = wid;
00195   hgt_ = hgt;
00196 
00197   line_buff_ = CreateBmpBuffer();
00198   if (line_buff_ == NULL) {
00199     delete []buff;
00200     return false;
00201   }
00202 
00203   // copy the data
00204   for (y = 0, pix = 0; y < hgt_; y++) {
00205     for (x = 0; x < wid_; x++, pix += 3) {
00206       // for now we only support gray scale,
00207       // so we expect R = G = B, it this is not the case, bail out
00208       if  (buff[pix] != buff[pix + 1] || buff[pix] != buff[pix + 2]) {
00209         delete []buff;
00210         return false;
00211       }
00212       line_buff_[y][x] = buff[pix];
00213     }
00214   }
00215 
00216   // delete temp buffer
00217   delete[]buff;
00218 
00219   return true;
00220 }
00221 
00222 Bmp8 * Bmp8::FromCharDumpFile(CachedFile *fp) {
00223   // create a Bmp8 object
00224   Bmp8 *bmp_obj = new Bmp8(0, 0);
00225   if (bmp_obj == NULL) {
00226     return NULL;
00227   }
00228 
00229   if (bmp_obj->LoadFromCharDumpFile(fp) == false) {
00230     delete bmp_obj;
00231   }
00232 
00233   return bmp_obj;
00234 }
00235 
00236 bool Bmp8::LoadFromCharDumpFile(FILE *fp) {
00237   unsigned short wid;
00238   unsigned short hgt;
00239   unsigned short x;
00240   unsigned short y;
00241   int buf_size;
00242   int pix;
00243   int pix_cnt;
00244   unsigned int val32;
00245   unsigned char *buff;
00246 
00247   // read and check 32 bit marker
00248   if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
00249     return false;
00250   }
00251 
00252   if (val32 != kMagicNumber) {
00253     return false;
00254   }
00255 
00256   // read wid and hgt
00257   if (fread(&wid, 1, sizeof(wid), fp) != sizeof(wid)) {
00258     return false;
00259   }
00260 
00261   if (fread(&hgt, 1, sizeof(hgt), fp) != sizeof(hgt)) {
00262     return false;
00263   }
00264 
00265   // read buf size
00266   if (fread(&buf_size, 1, sizeof(buf_size), fp) != sizeof(buf_size)) {
00267     return false;
00268   }
00269 
00270   // validate buf size: for now, only 3 channel (RBG) is supported
00271   pix_cnt = wid * hgt;
00272   if (buf_size != (3 * pix_cnt)) {
00273     return false;
00274   }
00275 
00276   // alloc memory & read the 3 channel buffer
00277   buff = new unsigned char[buf_size];
00278   if (buff == NULL) {
00279     return false;
00280   }
00281 
00282   if (fread(buff, 1, buf_size, fp) != buf_size) {
00283     delete []buff;
00284     return false;
00285   }
00286 
00287   // create internal buffers
00288   wid_ = wid;
00289   hgt_ = hgt;
00290 
00291   line_buff_ = CreateBmpBuffer();
00292   if (line_buff_ == NULL) {
00293     delete []buff;
00294     return false;
00295   }
00296 
00297   // copy the data
00298   for (y = 0, pix = 0; y < hgt_; y++) {
00299     for (x = 0; x < wid_; x++, pix += 3) {
00300       // for now we only support gray scale,
00301       // so we expect R = G = B, it this is not the case, bail out
00302       if  ( buff[pix] != buff[pix + 1] ||
00303             buff[pix] != buff[pix + 2]
00304           ) {
00305         delete []buff;
00306         return false;
00307           }
00308 
00309           line_buff_[y][x] = buff[pix];
00310     }
00311   }
00312 
00313   // delete temp buffer
00314   delete[]buff;
00315 
00316   return true;
00317 }
00318 
00319 Bmp8 * Bmp8::FromCharDumpFile(FILE *fp) {
00320   // create a Bmp8 object
00321   Bmp8 *bmp_obj = new Bmp8(0, 0);
00322   if (bmp_obj == NULL) {
00323     return NULL;
00324   }
00325 
00326   if (bmp_obj->LoadFromCharDumpFile(fp) == false) {
00327     delete bmp_obj;
00328   }
00329 
00330   return bmp_obj;
00331 }
00332 
00333 bool Bmp8::IsBlankColumn(int x) const {
00334   for (int y = 0; y < hgt_; y++) {
00335     if (line_buff_[y][x] != 0xff) {
00336       return false;
00337     }
00338   }
00339 
00340   return true;
00341 }
00342 
00343 bool Bmp8::IsBlankRow(int y) const {
00344   for (int x = 0; x < wid_; x++) {
00345     if (line_buff_[y][x] != 0xff) {
00346       return false;
00347     }
00348   }
00349 
00350   return true;
00351 }
00352 
00353 // crop the bitmap returning new dimensions
00354 void Bmp8::Crop(int *xst, int *yst, int *wid, int *hgt) {
00355   (*xst) = 0;
00356   (*yst) = 0;
00357 
00358   int xend = wid_ - 1;
00359   int yend = hgt_ - 1;
00360 
00361   while ((*xst) < (wid_ - 1) && (*xst) <= xend) {
00362     // column is not empty
00363     if (!IsBlankColumn((*xst))) {
00364       break;
00365     }
00366     (*xst)++;
00367   }
00368 
00369   while (xend > 0 && xend >= (*xst)) {
00370     // column is not empty
00371     if (!IsBlankColumn(xend)) {
00372       break;
00373     }
00374     xend--;
00375   }
00376 
00377   while ((*yst) < (hgt_ - 1) && (*yst) <= yend) {
00378     // column is not empty
00379     if (!IsBlankRow((*yst))) {
00380       break;
00381     }
00382     (*yst)++;
00383   }
00384 
00385   while (yend > 0 && yend >= (*yst)) {
00386     // column is not empty
00387     if (!IsBlankRow(yend)) {
00388       break;
00389     }
00390     yend--;
00391   }
00392 
00393   (*wid) = xend - (*xst) + 1;
00394   (*hgt) = yend - (*yst) + 1;
00395 }
00396 
00397 // generates a scaled bitmap with dimensions the new bmp will have the
00398 // same aspect ratio and will be centered in the box
00399 bool Bmp8::ScaleFrom(Bmp8 *bmp, bool isotropic) {
00400   int x_num;
00401   int x_denom;
00402   int y_num;
00403   int y_denom;
00404   int xoff;
00405   int yoff;
00406   int xsrc;
00407   int ysrc;
00408   int xdest;
00409   int ydest;
00410   int xst_src = 0;
00411   int yst_src = 0;
00412   int xend_src = bmp->wid_ - 1;
00413   int yend_src = bmp->hgt_ - 1;
00414   int wid_src;
00415   int hgt_src;
00416 
00417   // src dimensions
00418   wid_src = xend_src - xst_src + 1,
00419   hgt_src = yend_src - yst_src + 1;
00420 
00421   // scale to maintain aspect ratio if required
00422   if (isotropic) {
00423     if ((wid_ * hgt_src) > (hgt_ * wid_src)) {
00424       x_num = y_num = hgt_;
00425       x_denom = y_denom = hgt_src;
00426     } else {
00427       x_num = y_num = wid_;
00428       x_denom = y_denom = wid_src;
00429     }
00430   } else {
00431     x_num = wid_;
00432     y_num = hgt_;
00433     x_denom = wid_src;
00434     y_denom = hgt_src;
00435   }
00436 
00437   // compute offsets needed to center new bmp
00438   xoff = (wid_ - ((x_num * wid_src) / x_denom)) / 2;
00439   yoff = (hgt_ - ((y_num * hgt_src) / y_denom)) / 2;
00440 
00441   // scale up
00442   if (y_num > y_denom) {
00443     for (ydest = yoff; ydest < (hgt_ - yoff); ydest++) {
00444       // compute un-scaled y
00445       ysrc = static_cast<int>(0.5 + (1.0 * (ydest - yoff) *
00446           y_denom / y_num));
00447       if (ysrc < 0 || ysrc >= hgt_src) {
00448         continue;
00449       }
00450 
00451       for (xdest = xoff; xdest < (wid_ - xoff); xdest++) {
00452         // compute un-scaled y
00453         xsrc = static_cast<int>(0.5 + (1.0 * (xdest - xoff) *
00454             x_denom / x_num));
00455         if (xsrc < 0 || xsrc >= wid_src) {
00456           continue;
00457         }
00458 
00459         line_buff_[ydest][xdest] =
00460             bmp->line_buff_[ysrc + yst_src][xsrc + xst_src];
00461       }
00462     }
00463   } else {
00464     // or scale down
00465     // scaling down is a bit tricky: we'll accumulate pixels
00466     // and then compute the means
00467     unsigned int **dest_line_buff = CreateBmpBuffer(wid_, hgt_, 0),
00468       **dest_pix_cnt =  CreateBmpBuffer(wid_, hgt_, 0);
00469 
00470     for (ysrc = 0; ysrc < hgt_src; ysrc++) {
00471       // compute scaled y
00472       ydest = yoff + static_cast<int>(0.5 + (1.0 * ysrc * y_num / y_denom));
00473       if (ydest < 0 || ydest >= hgt_) {
00474         continue;
00475       }
00476 
00477       for (xsrc = 0; xsrc < wid_src; xsrc++) {
00478         // compute scaled y
00479         xdest = xoff + static_cast<int>(0.5 + (1.0 * xsrc * x_num / x_denom));
00480         if (xdest < 0 || xdest >= wid_) {
00481           continue;
00482         }
00483 
00484         dest_line_buff[ydest][xdest] +=
00485             bmp->line_buff_[ysrc + yst_src][xsrc + xst_src];
00486         dest_pix_cnt[ydest][xdest]++;
00487       }
00488     }
00489 
00490     for (ydest = 0; ydest < hgt_; ydest++) {
00491       for (xdest = 0; xdest < wid_; xdest++) {
00492         if (dest_pix_cnt[ydest][xdest] > 0) {
00493           unsigned int pixval =
00494               dest_line_buff[ydest][xdest] / dest_pix_cnt[ydest][xdest];
00495 
00496           line_buff_[ydest][xdest] =
00497               (unsigned char) min((unsigned int)255, pixval);
00498         }
00499       }
00500     }
00501 
00502     // we no longer need these temp buffers
00503     FreeBmpBuffer(dest_line_buff);
00504     FreeBmpBuffer(dest_pix_cnt);
00505   }
00506 
00507   return true;
00508 }
00509 
00510 bool Bmp8::LoadFromRawData(unsigned char *data) {
00511   unsigned char *pline_data = data;
00512 
00513   // copy the data
00514   for (int y = 0; y < hgt_; y++, pline_data += wid_) {
00515     memcpy(line_buff_[y], pline_data, wid_ * sizeof(*pline_data));
00516   }
00517 
00518   return true;
00519 }
00520 
00521 bool Bmp8::SaveBmp2CharDumpFile(FILE *fp) const {
00522   unsigned short wid;
00523   unsigned short hgt;
00524   unsigned short x;
00525   unsigned short y;
00526   int buf_size;
00527   int pix;
00528   int pix_cnt;
00529   unsigned int val32;
00530   unsigned char *buff;
00531 
00532   // write and check 32 bit marker
00533   val32 = kMagicNumber;
00534   if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
00535     return false;
00536   }
00537 
00538   // write wid and hgt
00539   wid = wid_;
00540   if (fwrite(&wid, 1, sizeof(wid), fp) != sizeof(wid)) {
00541     return false;
00542   }
00543 
00544   hgt = hgt_;
00545   if (fwrite(&hgt, 1, sizeof(hgt), fp) != sizeof(hgt)) {
00546     return false;
00547   }
00548 
00549   // write buf size
00550   pix_cnt = wid * hgt;
00551   buf_size = 3 * pix_cnt;
00552   if (fwrite(&buf_size, 1, sizeof(buf_size), fp) != sizeof(buf_size)) {
00553     return false;
00554   }
00555 
00556   // alloc memory & write the 3 channel buffer
00557   buff = new unsigned char[buf_size];
00558   if (buff == NULL) {
00559     return false;
00560   }
00561 
00562   // copy the data
00563   for (y = 0, pix = 0; y < hgt_; y++) {
00564     for (x = 0; x < wid_; x++, pix += 3) {
00565       buff[pix] =
00566       buff[pix + 1] =
00567       buff[pix + 2] = line_buff_[y][x];
00568     }
00569   }
00570 
00571   if (fwrite(buff, 1, buf_size, fp) != buf_size) {
00572     delete []buff;
00573     return false;
00574   }
00575 
00576   // delete temp buffer
00577   delete[]buff;
00578 
00579   return true;
00580 }
00581 
00582 // copy part of the specified bitmap to the top of the bitmap
00583 // does any necessary clipping
00584 void Bmp8::Copy(int x_st, int y_st, int wid, int hgt, Bmp8 *bmp_dest) const {
00585   int x_end = min(x_st + wid, static_cast<int>(wid_)),
00586   y_end = min(y_st + hgt, static_cast<int>(hgt_));
00587 
00588   for (int y = y_st; y < y_end; y++) {
00589     for (int x = x_st; x < x_end; x++) {
00590       bmp_dest->line_buff_[y - y_st][x - x_st] =
00591           line_buff_[y][x];
00592     }
00593   }
00594 }
00595 
00596 bool Bmp8::IsIdentical(Bmp8 *pBmp) const {
00597   if (wid_ != pBmp->wid_ || hgt_ != pBmp->hgt_) {
00598     return false;
00599   }
00600 
00601   for (int y = 0; y < hgt_; y++) {
00602     if (memcmp(line_buff_[y], pBmp->line_buff_[y], wid_) != 0) {
00603       return false;
00604     }
00605   }
00606 
00607   return true;
00608 }
00609 
00610 // Detect connected components in the bitmap
00611 ConComp ** Bmp8::FindConComps(int *concomp_cnt, int min_size) const {
00612   (*concomp_cnt) = 0;
00613 
00614   unsigned int **out_bmp_array = CreateBmpBuffer(wid_, hgt_, 0);
00615   if (out_bmp_array == NULL) {
00616     fprintf(stderr, "Cube ERROR (Bmp8::FindConComps): could not allocate "
00617             "bitmap array\n");
00618     return NULL;
00619   }
00620 
00621   // listed of connected components
00622   ConComp **concomp_array = NULL;
00623 
00624   int x;
00625   int y;
00626   int x_nbr;
00627   int y_nbr;
00628   int concomp_id;
00629   int alloc_concomp_cnt = 0;
00630 
00631   // neighbors to check
00632   const int nbr_cnt = 4;
00633 
00634   // relative coordinates of nbrs
00635   int x_del[nbr_cnt] = {-1, 0, 1, -1},
00636     y_del[nbr_cnt] = {-1, -1, -1, 0};
00637 
00638 
00639   for (y = 0; y < hgt_; y++) {
00640     for (x = 0; x < wid_; x++) {
00641       // is this a foreground pix
00642       if (line_buff_[y][x] != 0xff) {
00643         int master_concomp_id = 0;
00644         ConComp *master_concomp = NULL;
00645 
00646         // checkout the nbrs
00647         for (int nbr = 0; nbr < nbr_cnt; nbr++) {
00648           x_nbr = x + x_del[nbr];
00649           y_nbr = y + y_del[nbr];
00650 
00651           if (x_nbr < 0 || y_nbr < 0 || x_nbr >= wid_ || y_nbr >= hgt_) {
00652             continue;
00653           }
00654 
00655           // is this nbr a foreground pix
00656           if (line_buff_[y_nbr][x_nbr] != 0xff) {
00657             // get its concomp ID
00658             concomp_id = out_bmp_array[y_nbr][x_nbr];
00659 
00660             // this should not happen
00661             if (concomp_id < 1 || concomp_id > alloc_concomp_cnt) {
00662               fprintf(stderr, "Cube ERROR (Bmp8::FindConComps): illegal "
00663                       "connected component id: %d\n", concomp_id);
00664               FreeBmpBuffer(out_bmp_array);
00665               delete []concomp_array;
00666               return NULL;
00667             }
00668 
00669             // if we has previously found a component then merge the two
00670             // and delete the latest one
00671             if (master_concomp != NULL && concomp_id != master_concomp_id) {
00672               // relabel all the pts
00673               ConCompPt *pt_ptr = concomp_array[concomp_id - 1]->Head();
00674               while (pt_ptr != NULL) {
00675                 out_bmp_array[pt_ptr->y()][pt_ptr->x()] = master_concomp_id;
00676                 pt_ptr = pt_ptr->Next();
00677               }
00678 
00679               // merge the two concomp
00680               if (!master_concomp->Merge(concomp_array[concomp_id - 1])) {
00681                 fprintf(stderr, "Cube ERROR (Bmp8::FindConComps): could not "
00682                         "merge connected component: %d\n", concomp_id);
00683                 FreeBmpBuffer(out_bmp_array);
00684                 delete []concomp_array;
00685                 return NULL;
00686               }
00687 
00688               // delete the merged concomp
00689               delete concomp_array[concomp_id - 1];
00690               concomp_array[concomp_id - 1] = NULL;
00691             } else {
00692               // this is the first concomp we encounter
00693               master_concomp_id = concomp_id;
00694               master_concomp = concomp_array[master_concomp_id - 1];
00695 
00696               out_bmp_array[y][x] = master_concomp_id;
00697 
00698               if (!master_concomp->Add(x, y)) {
00699                 fprintf(stderr, "Cube ERROR (Bmp8::FindConComps): could not "
00700                         "add connected component (%d,%d)\n", x, y);
00701                 FreeBmpBuffer(out_bmp_array);
00702                 delete []concomp_array;
00703                 return NULL;
00704               }
00705             }
00706           }  // foreground nbr
00707         }  // nbrs
00708 
00709         // if there was no foreground pix, then create a new concomp
00710         if (master_concomp == NULL) {
00711           master_concomp = new ConComp();
00712           if (master_concomp == NULL || master_concomp->Add(x, y) == false) {
00713             fprintf(stderr, "Cube ERROR (Bmp8::FindConComps): could not "
00714                     "allocate or add a connected component\n");
00715             FreeBmpBuffer(out_bmp_array);
00716             delete []concomp_array;
00717             return NULL;
00718           }
00719 
00720           // extend the list of concomps if needed
00721           if ((alloc_concomp_cnt % kConCompAllocChunk) == 0) {
00722             ConComp **temp_con_comp =
00723                 new ConComp *[alloc_concomp_cnt + kConCompAllocChunk];
00724             if (temp_con_comp == NULL) {
00725               fprintf(stderr, "Cube ERROR (Bmp8::FindConComps): could not "
00726                       "extend array of connected components\n");
00727               FreeBmpBuffer(out_bmp_array);
00728               delete []concomp_array;
00729               return NULL;
00730             }
00731 
00732             if (alloc_concomp_cnt > 0) {
00733               memcpy(temp_con_comp, concomp_array,
00734                      alloc_concomp_cnt * sizeof(*concomp_array));
00735 
00736               delete []concomp_array;
00737             }
00738 
00739             concomp_array = temp_con_comp;
00740           }
00741 
00742           concomp_array[alloc_concomp_cnt++] = master_concomp;
00743           out_bmp_array[y][x] = alloc_concomp_cnt;
00744         }
00745       }  // foreground pix
00746     }  // x
00747   }  // y
00748 
00749   // free the concomp bmp
00750   FreeBmpBuffer(out_bmp_array);
00751 
00752   if (alloc_concomp_cnt > 0 && concomp_array != NULL) {
00753     // scan the array of connected components and color
00754     // the o/p buffer with the corresponding concomps
00755     (*concomp_cnt) = 0;
00756     ConComp *concomp = NULL;
00757 
00758     for (int concomp_idx = 0; concomp_idx < alloc_concomp_cnt; concomp_idx++) {
00759       concomp = concomp_array[concomp_idx];
00760 
00761       // found a concomp
00762       if (concomp != NULL) {
00763         // add the connected component if big enough
00764         if (concomp->PtCnt() > min_size) {
00765           concomp->SetLeftMost(true);
00766           concomp->SetRightMost(true);
00767           concomp->SetID((*concomp_cnt));
00768           concomp_array[(*concomp_cnt)++] = concomp;
00769         } else {
00770           delete concomp;
00771         }
00772       }
00773     }
00774   }
00775 
00776   return concomp_array;
00777 }
00778 
00779 // precompute the tan table to speedup deslanting
00780 bool Bmp8::ComputeTanTable() {
00781   int ang_idx;
00782   float ang_val;
00783 
00784   // alloc memory for tan table
00785   delete []tan_table_;
00786   tan_table_ = new float[kDeslantAngleCount];
00787   if (tan_table_ == NULL) {
00788     return false;
00789   }
00790 
00791   for (ang_idx = 0, ang_val = kMinDeslantAngle;
00792        ang_idx < kDeslantAngleCount; ang_idx++) {
00793     tan_table_[ang_idx] = tan(ang_val * M_PI / 180.0f);
00794     ang_val += kDeslantAngleDelta;
00795   }
00796 
00797   return true;
00798 }
00799 
00800 // generates a deslanted bitmap from the passed bitmap.
00801 bool Bmp8::Deslant() {
00802   int x;
00803   int y;
00804   int des_x;
00805   int des_y;
00806   int ang_idx;
00807   int best_ang;
00808   int min_des_x;
00809   int max_des_x;
00810   int des_wid;
00811 
00812   // only do deslanting if bitmap is wide enough
00813   // otherwise it slant estimate might not be reliable
00814   if (wid_ < (hgt_ * 2)) {
00815     return true;
00816   }
00817 
00818   // compute tan table if needed
00819   if (tan_table_ == NULL && !ComputeTanTable()) {
00820     return false;
00821   }
00822 
00823   // compute min and max values for x after deslant
00824   min_des_x = static_cast<int>(0.5f + (hgt_ - 1) * tan_table_[0]);
00825   max_des_x = (wid_ - 1) +
00826       static_cast<int>(0.5f + (hgt_ - 1) * tan_table_[kDeslantAngleCount - 1]);
00827 
00828   des_wid = max_des_x - min_des_x + 1;
00829 
00830   // alloc memory for histograms
00831   int **angle_hist = new int*[kDeslantAngleCount];
00832   for (ang_idx = 0; ang_idx < kDeslantAngleCount; ang_idx++) {
00833     angle_hist[ang_idx] = new int[des_wid];
00834     if (angle_hist[ang_idx] == NULL) {
00835       delete[] angle_hist;
00836       return false;
00837     }
00838     memset(angle_hist[ang_idx], 0, des_wid * sizeof(*angle_hist[ang_idx]));
00839   }
00840 
00841   // compute histograms
00842   for (y = 0; y < hgt_; y++) {
00843     for (x = 0; x < wid_; x++) {
00844       // find a non-bkgrnd pixel
00845       if (line_buff_[y][x] != 0xff) {
00846         des_y = hgt_ - y - 1;
00847         // stamp all histograms
00848         for (ang_idx = 0; ang_idx < kDeslantAngleCount; ang_idx++) {
00849           des_x = x + static_cast<int>(0.5f + (des_y * tan_table_[ang_idx]));
00850           if (des_x >= min_des_x && des_x <= max_des_x) {
00851             angle_hist[ang_idx][des_x - min_des_x]++;
00852           }
00853         }
00854       }
00855     }
00856   }
00857 
00858   // find the histogram with the lowest entropy
00859   float entropy;
00860   double best_entropy = 0.0f;
00861   double norm_val;
00862 
00863   best_ang = -1;
00864   for (ang_idx = 0; ang_idx < kDeslantAngleCount; ang_idx++) {
00865     entropy = 0.0f;
00866 
00867     for (x = min_des_x; x <= max_des_x; x++) {
00868       if (angle_hist[ang_idx][x - min_des_x] > 0) {
00869         norm_val = (1.0f * angle_hist[ang_idx][x - min_des_x] / hgt_);
00870         entropy += (-1.0f * norm_val * log(norm_val));
00871       }
00872     }
00873 
00874     if (best_ang == -1 || entropy < best_entropy) {
00875       best_ang = ang_idx;
00876       best_entropy = entropy;
00877     }
00878 
00879     // free the histogram
00880     delete[] angle_hist[ang_idx];
00881   }
00882   delete[] angle_hist;
00883 
00884   // deslant
00885   if (best_ang != -1) {
00886     unsigned char **dest_lines;
00887     int old_wid = wid_;
00888 
00889     // create a new buffer
00890     wid_ = des_wid;
00891     dest_lines = CreateBmpBuffer();
00892     if (dest_lines == NULL) {
00893       return false;
00894     }
00895 
00896     for (y = 0; y < hgt_; y++) {
00897       for (x = 0; x < old_wid; x++) {
00898         // find a non-bkgrnd pixel
00899         if (line_buff_[y][x] != 0xff) {
00900           des_y = hgt_ - y - 1;
00901           // compute new pos
00902           des_x = x + static_cast<int>(0.5f + (des_y * tan_table_[best_ang]));
00903           dest_lines[y][des_x - min_des_x] = 0;
00904         }
00905       }
00906     }
00907 
00908     // free old buffer
00909     FreeBmpBuffer(line_buff_);
00910     line_buff_ = dest_lines;
00911   }
00912   return true;
00913 }
00914 
00915 // Load dimensions & contents of bitmap from raw data
00916 bool Bmp8::LoadFromCharDumpFile(unsigned char **raw_data_ptr) {
00917   unsigned short wid;
00918   unsigned short hgt;
00919   unsigned short x;
00920   unsigned short y;
00921   unsigned char *raw_data = (*raw_data_ptr);
00922   int buf_size;
00923   int pix;
00924   unsigned int val32;
00925 
00926   // read and check 32 bit marker
00927   memcpy(&val32, raw_data, sizeof(val32));
00928   raw_data += sizeof(val32);
00929 
00930   if (val32 != kMagicNumber) {
00931     return false;
00932   }
00933 
00934   // read wid and hgt
00935   memcpy(&wid, raw_data, sizeof(wid));
00936   raw_data += sizeof(wid);
00937 
00938   memcpy(&hgt, raw_data, sizeof(hgt));
00939   raw_data += sizeof(hgt);
00940 
00941   // read buf size
00942   memcpy(&buf_size, raw_data, sizeof(buf_size));
00943   raw_data += sizeof(buf_size);
00944 
00945   // validate buf size: for now, only 3 channel (RBG) is supported
00946   if (buf_size != (3 * wid * hgt)) {
00947     return false;
00948   }
00949 
00950   wid_ = wid;
00951   hgt_ = hgt;
00952 
00953   line_buff_ = CreateBmpBuffer();
00954   if (line_buff_ == NULL) {
00955     return false;
00956   }
00957 
00958   // copy the data
00959   for (y = 0, pix = 0; y < hgt_; y++) {
00960     for (x = 0; x < wid_; x++, pix += 3) {
00961       // for now we only support gray scale,
00962       // so we expect R = G = B, it this is not the case, bail out
00963       if  (raw_data[pix] != raw_data[pix + 1] ||
00964            raw_data[pix] != raw_data[pix + 2]) {
00965         return false;
00966       }
00967 
00968       line_buff_[y][x] = raw_data[pix];
00969     }
00970   }
00971 
00972   (*raw_data_ptr) = raw_data + buf_size;
00973   return true;
00974 }
00975 
00976 float Bmp8::ForegroundRatio() const {
00977   int fore_cnt = 0;
00978 
00979   if (wid_ <= 0 || hgt_ <= 0) {
00980     return 1.0;
00981   }
00982 
00983   for (int y = 0; y < hgt_; y++) {
00984     for (int x = 0; x < wid_; x++) {
00985       fore_cnt += (line_buff_[y][x] == 0xff ? 0 : 1);
00986     }
00987   }
00988 
00989   return (1.0 * (fore_cnt / hgt_) / wid_);
00990 }
00991 
00992 // generates a deslanted bitmap from the passed bitmap
00993 bool Bmp8::HorizontalDeslant(double *deslant_angle) {
00994   int x;
00995   int y;
00996   int des_y;
00997   int ang_idx;
00998   int best_ang;
00999   int min_des_y;
01000   int max_des_y;
01001   int des_hgt;
01002 
01003   // compute tan table if necess.
01004   if (tan_table_ == NULL && !ComputeTanTable()) {
01005     return false;
01006   }
01007 
01008   // compute min and max values for x after deslant
01009   min_des_y = min(0, static_cast<int>((wid_ - 1) * tan_table_[0]));
01010   max_des_y = (hgt_ - 1) +
01011       max(0, static_cast<int>((wid_ - 1) * tan_table_[kDeslantAngleCount - 1]));
01012 
01013   des_hgt = max_des_y - min_des_y + 1;
01014 
01015   // alloc memory for histograms
01016   int **angle_hist = new int*[kDeslantAngleCount];
01017   for (ang_idx = 0; ang_idx < kDeslantAngleCount; ang_idx++) {
01018     angle_hist[ang_idx] = new int[des_hgt];
01019     if (angle_hist[ang_idx] == NULL) {
01020       delete[] angle_hist;
01021       return false;
01022     }
01023     memset(angle_hist[ang_idx], 0, des_hgt * sizeof(*angle_hist[ang_idx]));
01024   }
01025 
01026   // compute histograms
01027   for (y = 0; y < hgt_; y++) {
01028     for (x = 0; x < wid_; x++) {
01029       // find a non-bkgrnd pixel
01030       if (line_buff_[y][x] != 0xff) {
01031         // stamp all histograms
01032         for (ang_idx = 0; ang_idx < kDeslantAngleCount; ang_idx++) {
01033           des_y = y - static_cast<int>(x * tan_table_[ang_idx]);
01034           if (des_y >= min_des_y && des_y <= max_des_y) {
01035             angle_hist[ang_idx][des_y - min_des_y]++;
01036           }
01037         }
01038       }
01039     }
01040   }
01041 
01042   // find the histogram with the lowest entropy
01043   float entropy;
01044   float best_entropy =  0.0f;
01045   float norm_val;
01046 
01047   best_ang = -1;
01048   for (ang_idx = 0; ang_idx < kDeslantAngleCount; ang_idx++) {
01049     entropy = 0.0f;
01050 
01051     for (y = min_des_y; y <= max_des_y; y++) {
01052       if (angle_hist[ang_idx][y - min_des_y] > 0) {
01053         norm_val = (1.0f * angle_hist[ang_idx][y - min_des_y] / wid_);
01054         entropy += (-1.0f * norm_val * log(norm_val));
01055       }
01056     }
01057 
01058     if (best_ang == -1 || entropy < best_entropy) {
01059       best_ang = ang_idx;
01060       best_entropy = entropy;
01061     }
01062 
01063     // free the histogram
01064     delete[] angle_hist[ang_idx];
01065   }
01066   delete[] angle_hist;
01067 
01068   (*deslant_angle) = 0.0;
01069 
01070   // deslant
01071   if (best_ang != -1) {
01072     unsigned char **dest_lines;
01073     int old_hgt = hgt_;
01074 
01075     // create a new buffer
01076     min_des_y = min(0, static_cast<int>((wid_ - 1) * -tan_table_[best_ang]));
01077     max_des_y = (hgt_ - 1) +
01078         max(0, static_cast<int>((wid_ - 1) * -tan_table_[best_ang]));
01079     hgt_ = max_des_y - min_des_y + 1;
01080     dest_lines = CreateBmpBuffer();
01081     if (dest_lines == NULL) {
01082       return false;
01083     }
01084 
01085     for (y = 0; y < old_hgt; y++) {
01086       for (x = 0; x < wid_; x++) {
01087         // find a non-bkgrnd pixel
01088         if (line_buff_[y][x] != 0xff) {
01089           // compute new pos
01090           des_y = y - static_cast<int>((x * tan_table_[best_ang]));
01091           dest_lines[des_y - min_des_y][x] = 0;
01092         }
01093       }
01094     }
01095 
01096     // free old buffer
01097     FreeBmpBuffer(line_buff_);
01098     line_buff_ = dest_lines;
01099 
01100     (*deslant_angle) = kMinDeslantAngle + (best_ang * kDeslantAngleDelta);
01101   }
01102 
01103   return true;
01104 }
01105 
01106 float Bmp8::MeanHorizontalHistogramEntropy() const {
01107   float entropy = 0.0f;
01108 
01109   // compute histograms
01110   for (int y = 0; y < hgt_; y++) {
01111     int pix_cnt = 0;
01112 
01113     for (int x = 0; x < wid_; x++) {
01114       // find a non-bkgrnd pixel
01115       if (line_buff_[y][x] != 0xff) {
01116         pix_cnt++;
01117       }
01118     }
01119 
01120     if (pix_cnt > 0) {
01121       float norm_val = (1.0f * pix_cnt / wid_);
01122       entropy += (-1.0f * norm_val * log(norm_val));
01123     }
01124   }
01125 
01126   return entropy / hgt_;
01127 }
01128 
01129 int *Bmp8::HorizontalHistogram() const {
01130   int *hist = new int[hgt_];
01131   if (hist == NULL) {
01132     return NULL;
01133   }
01134 
01135   // compute histograms
01136   for (int y = 0; y < hgt_; y++) {
01137     hist[y] = 0;
01138 
01139     for (int x = 0; x < wid_; x++) {
01140       // find a non-bkgrnd pixel
01141       if (line_buff_[y][x] != 0xff) {
01142         hist[y]++;
01143       }
01144     }
01145   }
01146 
01147   return hist;
01148 }
01149 
01150 }  // namespace tesseract