Tesseract  3.02
tesseract-ocr/textord/cjkpitch.cpp
Go to the documentation of this file.
00001 
00002 // File:        cjkpitch.cpp
00003 // Description: Code to determine fixed pitchness and the pitch if fixed,
00004 //              for CJK text.
00005 // Copyright 2011 Google Inc. All Rights Reserved.
00006 // Author: takenaka@google.com (Hiroshi Takenaka)
00007 // Created:     Mon Jun 27 12:48:35 JST 2011
00008 //
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 #include "cjkpitch.h"
00021 #include "genericvector.h"
00022 #include "ndminx.h"
00023 #include "topitch.h"
00024 #include "tovars.h"
00025 
00026 BOOL_VAR(textord_space_size_is_variable, FALSE,
00027          "If true, word delimiter spaces are assumed to have "
00028          "variable width, even though characters have fixed pitch.");
00029 
00030 namespace {
00031 
00032 // Allow +/-10% error for character pitch / body size.
00033 static const float kFPTolerance = 0.1;
00034 
00035 // Minimum ratio of "good" character pitch for a row to be considered
00036 // to be fixed-pitch.
00037 static const float kFixedPitchThreshold = 0.35;
00038 
00039 // rank statistics for a small collection of float values.
00040 class SimpleStats {
00041  public:
00042   SimpleStats(): finalized_(false), values_() { }
00043   ~SimpleStats() { }
00044 
00045   void Clear() {
00046     values_.clear();
00047     finalized_ = false;
00048   }
00049 
00050   void Add(float value) {
00051     values_.push_back(value);
00052     finalized_ = false;
00053   }
00054 
00055   void Finish() {
00056     values_.sort(float_compare);
00057     finalized_ = true;
00058   }
00059 
00060   float ile(double frac) {
00061     if (!finalized_) Finish();
00062     if (values_.empty()) return 0.0;
00063     if (frac >= 1.0) return values_.back();
00064     if (frac <= 0.0 || values_.size() == 1) return values_[0];
00065     int index = static_cast<int>((values_.size() - 1) * frac);
00066     float reminder = (values_.size() - 1) * frac - index;
00067 
00068     return values_[index] * (1.0 - reminder) +
00069         values_[index + 1] * reminder;
00070   }
00071 
00072   float median() {
00073     return ile(0.5);
00074   }
00075 
00076   float maximum() {
00077     if (!finalized_) Finish();
00078     if (values_.empty()) return 0.0;
00079     return values_.back();
00080   }
00081 
00082   float minimum() {
00083     if (!finalized_) Finish();
00084     if (values_.empty()) return 0.0;
00085     return values_[0];
00086   }
00087 
00088   int size() const {
00089     return values_.size();
00090   }
00091 
00092  private:
00093   static int float_compare(const void* a, const void* b) {
00094     const float* f_a = reinterpret_cast<const float*>(a);
00095     const float* f_b = reinterpret_cast<const float*>(b);
00096     return (*f_a > *f_b) ? 1 : ((*f_a < *f_b) ? -1 : 0);
00097   }
00098 
00099   bool finalized_;
00100   GenericVector<float> values_;
00101 };
00102 
00103 // statistics for a small collection of float pairs (x, y).
00104 // EstimateYFor(x, r) returns the estimated y at x, based on
00105 // existing samples between x*(1-r) ~ x*(1+r).
00106 class LocalCorrelation {
00107  public:
00108   struct float_pair {
00109     float x, y;
00110     int vote;
00111   };
00112 
00113   LocalCorrelation(): finalized_(false) { }
00114   ~LocalCorrelation() { }
00115 
00116   void Finish() {
00117     values_.sort(float_pair_compare);
00118     finalized_ = true;
00119   }
00120 
00121   void Clear() {
00122     finalized_ = false;
00123   }
00124 
00125   void Add(float x, float y, int v) {
00126     struct float_pair value;
00127     value.x = x;
00128     value.y = y;
00129     value.vote = v;
00130     values_.push_back(value);
00131     finalized_ = false;
00132   }
00133 
00134   float EstimateYFor(float x, float r) {
00135     ASSERT_HOST(finalized_);
00136     int start = 0, end = values_.size();
00137     // Because the number of samples (used_) is assumed to be small,
00138     // just use linear search to find values within the range.
00139     while (start < values_.size() && values_[start].x < x * (1.0 - r)) start++;
00140     while (end - 1 >= 0 && values_[end - 1].x > x * (1.0 + r)) end--;
00141 
00142     // Fall back to the global average if there are no data within r
00143     // of x.
00144     if (start >= end) {
00145       start = 0;
00146       end = values_.size();
00147     }
00148 
00149     // Compute weighted average of the values.
00150     float rc = 0;
00151     int vote = 0;
00152     for (int i = start; i < end; i++) {
00153       rc += values_[i].vote * x * values_[i].y / values_[i].x;
00154       vote += values_[i].vote;
00155     }
00156 
00157     return rc / vote;
00158   }
00159 
00160  private:
00161   static int float_pair_compare(const void* a, const void* b) {
00162     const float_pair* f_a = reinterpret_cast<const float_pair*>(a);
00163     const float_pair* f_b = reinterpret_cast<const float_pair*>(b);
00164     return (f_a->x > f_b->x) ? 1 : ((f_a->x < f_b->x) ? -1 : 0);
00165   }
00166 
00167   bool finalized_;
00168   GenericVector<struct float_pair> values_;
00169 };
00170 
00171 // Class to represent a character on a fixed pitch row.  A FPChar may
00172 // consist of multiple blobs (BLOBNBOX's).
00173 class FPChar {
00174  public:
00175   enum Alignment {
00176     ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD
00177   };
00178 
00179   FPChar(): box_(), real_body_(),
00180             from_(NULL), to_(NULL), num_blobs_(0), max_gap_(0),
00181             final_(false), alignment_(ALIGN_UNKNOWN),
00182             merge_to_prev_(false), delete_flag_(false) {
00183   }
00184 
00185   // Initialize from blob.
00186   void Init(BLOBNBOX *blob) {
00187     box_ = blob->bounding_box();
00188     real_body_ = box_;
00189     from_ = to_ = blob;
00190     num_blobs_ = 1;
00191   }
00192 
00193   // Merge this character with "next". The "next" character should
00194   // consist of succeeding blobs on the same row.
00195   void Merge(const FPChar &next) {
00196     int gap = real_body_.x_gap(next.real_body_);
00197     if (gap > max_gap_) max_gap_ = gap;
00198 
00199     box_ += next.box_;
00200     real_body_ += next.real_body_;
00201     to_ = next.to_;
00202     num_blobs_ += next.num_blobs_;
00203   }
00204 
00205   // Accessors.
00206   const TBOX &box() const { return box_; }
00207   void set_box(const TBOX &box) {
00208     box_ = box;
00209   }
00210   const TBOX &real_body() const { return real_body_; }
00211 
00212   bool is_final() const { return final_; }
00213   void set_final(bool flag) {
00214     final_ = flag;
00215   }
00216 
00217   const Alignment& alignment() const {
00218     return alignment_;
00219   }
00220   void set_alignment(Alignment alignment) {
00221     alignment_ = alignment;
00222   }
00223 
00224   bool merge_to_prev() const {
00225     return merge_to_prev_;
00226   }
00227   void set_merge_to_prev(bool flag) {
00228     merge_to_prev_ = flag;
00229   }
00230 
00231   bool delete_flag() const {
00232     return delete_flag_;
00233   }
00234   void set_delete_flag(bool flag) {
00235     delete_flag_ = flag;
00236   }
00237 
00238   int max_gap() const {
00239     return max_gap_;
00240   }
00241 
00242   int num_blobs() const {
00243     return num_blobs_;
00244   }
00245 
00246  private:
00247   TBOX box_;  // Rectangle region considered to be occupied by this
00248   // character.  It could be bigger than the bounding box.
00249   TBOX real_body_;  // Real bounding box of this character.
00250   BLOBNBOX *from_;  // The first blob of this character.
00251   BLOBNBOX *to_;  // The last blob of this character.
00252   int num_blobs_;  // Number of blobs that belong to this character.
00253   int max_gap_;  // Maximum x gap between the blobs.
00254 
00255   bool final_;  // True if alignment/fragmentation decision for this
00256   // character is finalized.
00257 
00258   Alignment alignment_;  // Alignment status.
00259   bool merge_to_prev_;  // True if this is a fragmented blob that
00260   // needs to be merged to the previous
00261   // character.
00262 
00263   int delete_flag_;  // True if this character is merged to another
00264   // one and needs to be deleted.
00265 };
00266 
00267 // Class to represent a fixed pitch row, as a linear collection of
00268 // FPChar's.
00269 class FPRow {
00270  public:
00271   FPRow() : pitch_(0.0f), estimated_pitch_(0.0f),
00272             all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(),
00273             heights_(), characters_(), real_row_(NULL) {
00274   }
00275 
00276   ~FPRow() { }
00277 
00278   // Initialize from TD_ROW.
00279   void Init(TO_ROW *row);
00280 
00281   // Estimate character pitch of this row, based on current alignment
00282   // status of underlying FPChar's.  The argument pass1 can be set to
00283   // true if the function is called after Pass1Analyze(), to eliminate
00284   // some redundant computation.
00285   void EstimatePitch(bool pass1);
00286 
00287   // Check each character if it has good character pitches between its
00288   // predecessor and its successor and set its alignment status.  If
00289   // we already calculated the estimated pitch for this row, the value
00290   // is used.  If we didn't, a character is considered to be good, if
00291   // the pitches between its predecessor and its successor are almost
00292   // equal.
00293   void Pass1Analyze();
00294 
00295   // Find characters that fit nicely into one imaginary body next to a
00296   // character which is already finalized. Then mark them as character
00297   // fragments.
00298   bool Pass2Analyze();
00299 
00300   // Merge FPChars marked as character fragments into one.
00301   void MergeFragments();
00302 
00303   // Finalize characters that are already large enough and cannot be
00304   // merged with others any more.
00305   void FinalizeLargeChars();
00306 
00307   // Ouput pitch estimation results to attributes of TD_ROW.
00308   void OutputEstimations();
00309 
00310   void DebugOutputResult(int row_index);
00311 
00312   int good_pitches() {
00313     return good_pitches_.size();
00314   }
00315 
00316   int good_gaps() {
00317     return good_gaps_.size();
00318   }
00319 
00320   float pitch() {
00321     return pitch_;
00322   }
00323 
00324   float estimated_pitch() {
00325     return estimated_pitch_;
00326   }
00327 
00328   void set_estimated_pitch(float v) {
00329     estimated_pitch_ = v;
00330   }
00331 
00332   float height() {
00333     return height_;
00334   }
00335 
00336   float height_pitch_ratio() {
00337     if (good_pitches_.size() < 2) return -1.0;
00338     return height_ / good_pitches_.median();
00339   }
00340 
00341   float gap() {
00342     return gap_;
00343   }
00344 
00345   int num_chars() {
00346     return characters_.size();
00347   }
00348   FPChar *character(int i) {
00349     return &characters_[i];
00350   }
00351 
00352   const TBOX &box(int i) {
00353     return characters_[i].box();
00354   }
00355 
00356   const TBOX &real_body(int i) {
00357     return characters_[i].real_body();
00358   }
00359 
00360   bool is_box_modified(int i) {
00361     return !(characters_[i].box() == characters_[i].real_body());
00362   }
00363 
00364   float center_x(int i) {
00365     return (characters_[i].box().left() + characters_[i].box().right()) / 2.0;
00366   }
00367 
00368   bool is_final(int i) {
00369     return characters_[i].is_final();
00370   }
00371 
00372   void finalize(int i) {
00373     characters_[i].set_final(true);
00374   }
00375 
00376   bool is_good(int i) {
00377     return characters_[i].alignment() == FPChar::ALIGN_GOOD;
00378   }
00379 
00380   bool is_bad(int i) {
00381     return characters_[i].alignment() == FPChar::ALIGN_BAD;
00382   }
00383 
00384   bool is_unknown(int i) {
00385     return characters_[i].alignment() == FPChar::ALIGN_UNKNOWN;
00386   }
00387 
00388   void mark_good(int i) {
00389     characters_[i].set_alignment(FPChar::ALIGN_GOOD);
00390   }
00391 
00392   void mark_bad(int i) {
00393     characters_[i].set_alignment(FPChar::ALIGN_BAD);
00394   }
00395 
00396   void clear_alignment(int i) {
00397     characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN);
00398   }
00399 
00400  private:
00401   static float x_overlap_fraction(const TBOX& box1, const TBOX& box2) {
00402     if (MIN(box1.width(), box2.width()) == 0) return 0.0;
00403     return -box1.x_gap(box2) / (float)MIN(box1.width(), box2.width());
00404   }
00405 
00406   static bool mostly_overlap(const TBOX& box1, const TBOX& box2) {
00407     return x_overlap_fraction(box1, box2) > 0.9;
00408   }
00409 
00410   static bool significant_overlap(const TBOX& box1, const TBOX& box2) {
00411     if (MIN(box1.width(), box2.width()) == 0) return false;
00412     int overlap = -box1.x_gap(box2);
00413     return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1;
00414   }
00415 
00416   static float box_pitch(const TBOX& ref, const TBOX& box) {
00417     return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0;
00418   }
00419 
00420   // Check if two neighboring characters satisfy the fixed pitch model.
00421   static bool is_good_pitch(float pitch, const TBOX& box1, const TBOX& box2) {
00422     // Character box shouldn't exceed pitch.
00423     if (box1.width() >= pitch * (1.0 + kFPTolerance) ||
00424         box2.width() >= pitch * (1.0 + kFPTolerance) ||
00425         box1.height() >= pitch * (1.0 + kFPTolerance) ||
00426         box2.height() >= pitch * (1.0 + kFPTolerance)) return false;
00427 
00428     const float real_pitch = box_pitch(box1, box2);
00429     if (abs(real_pitch - pitch) < pitch * kFPTolerance) return true;
00430 
00431     if (textord_space_size_is_variable) {
00432       // Hangul characters usually have fixed pitch, but words are
00433       // delimited by space which can be narrower than characters.
00434       if (real_pitch > pitch && real_pitch < pitch * 2.0 &&
00435           real_pitch - box1.x_gap(box2) < pitch) {
00436         return true;
00437       }
00438     }
00439     return false;
00440   }
00441 
00442   static bool is_interesting_blob(const BLOBNBOX *blob) {
00443     return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER;
00444   }
00445 
00446   // Cleanup chars that are already merged to others.
00447   void DeleteChars() {
00448     int index = 0;
00449     for (int i = 0; i < characters_.size(); ++i) {
00450       if (!characters_[i].delete_flag()) {
00451         if (index != i) characters_[index] = characters_[i];
00452         index++;
00453       }
00454     }
00455     characters_.truncate(index);
00456   }
00457 
00458   float pitch_;  // Character pitch.
00459   float estimated_pitch_;  // equal to pitch_ if pitch_ is considered
00460   // to be good enough.
00461   float height_;  // Character height.
00462   float gap_;  // Minimum gap between characters.
00463 
00464   // Pitches between any two successive characters.
00465   SimpleStats all_pitches_;
00466   // Gaps between any two successive characters.
00467   SimpleStats all_gaps_;
00468   // Pitches between any two successive characters that are consistent
00469   // with the fixed pitch model.
00470   SimpleStats good_pitches_;
00471   // Gaps between any two successive characters that are consistent
00472   // with the fixed pitch model.
00473   SimpleStats good_gaps_;
00474 
00475   SimpleStats heights_;
00476 
00477   GenericVector<FPChar> characters_;
00478   TO_ROW *real_row_;  // Underlying TD_ROW for this row.
00479 };
00480 
00481 void FPRow::Init(TO_ROW *row) {
00482   ASSERT_HOST(row != NULL);
00483   ASSERT_HOST(row->xheight > 0);
00484   real_row_ = row;
00485   real_row_->pitch_decision = PITCH_CORR_PROP;  // Default decision.
00486 
00487   BLOBNBOX_IT blob_it = row->blob_list();
00488   // Initialize characters_ and compute the initial estimation of
00489   // character height.
00490   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
00491     if (is_interesting_blob(blob_it.data())) {
00492       FPChar fp_char;
00493       fp_char.Init(blob_it.data());
00494       // Merge unconditionally if two blobs overlap.
00495       if (!characters_.empty() &&
00496           significant_overlap(fp_char.box(), characters_.back().box())) {
00497         characters_.back().Merge(fp_char);
00498       } else {
00499         characters_.push_back(fp_char);
00500       }
00501       TBOX bound = blob_it.data()->bounding_box();
00502       if (bound.height() * 3.0 > bound.width()) {
00503         heights_.Add(bound.height());
00504       }
00505     }
00506   }
00507   heights_.Finish();
00508   height_ = heights_.ile(0.875);
00509 }
00510 
00511 void FPRow::OutputEstimations() {
00512   if (good_pitches_.size() == 0) {
00513     pitch_ = 0.0f;
00514     real_row_->pitch_decision = PITCH_CORR_PROP;
00515     return;
00516   }
00517 
00518   pitch_ = good_pitches_.median();
00519   real_row_->fixed_pitch = pitch_;
00520   // good_gaps_.ile(0.125) can be large if most characters on the row
00521   // are skinny. Use pitch_ - height_ instead if it's smaller, but
00522   // positive.
00523   real_row_->kern_size = real_row_->pr_nonsp =
00524       MIN(good_gaps_.ile(0.125), MAX(pitch_ - height_, 0));
00525   real_row_->body_size = pitch_ - real_row_->kern_size;
00526 
00527   if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) {
00528     // If more than half of the characters of a line don't fit to the
00529     // fixed pitch model, consider the line to be propotional. 50%
00530     // seems to be a good threshold in practice as well.
00531     // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in
00532     // real_row_ as a partial estimation result and try to use them in the
00533     // normalization process.
00534     real_row_->pitch_decision = PITCH_CORR_PROP;
00535     return;
00536   } else if (good_pitches_.size() > all_pitches_.size() * 0.75) {
00537     real_row_->pitch_decision = PITCH_DEF_FIXED;
00538   } else {
00539     real_row_->pitch_decision = PITCH_CORR_FIXED;
00540   }
00541 
00542   real_row_->space_size = real_row_->pr_space = pitch_;
00543   // Set min_space to 50% of character pitch so that we can break CJK
00544   // text at a half-width space after punctuation.
00545   real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5;
00546 
00547   // Don't consider a quarter space as a real space, because it's used
00548   // for line justification in traditional Japanese books.
00549   real_row_->max_nonspace = MAX(pitch_ * 0.25 + good_gaps_.minimum(),
00550                                 (double)good_gaps_.ile(0.875));
00551 
00552   int space_threshold =
00553       MIN((real_row_->max_nonspace + real_row_->min_space) / 2,
00554           real_row_->xheight);
00555 
00556   // Make max_nonspace larger than any intra-character gap so that
00557   // make_prop_words() won't break a row at the middle of a character.
00558   for (int i = 0; i < num_chars(); ++i) {
00559     if (characters_[i].max_gap() > real_row_->max_nonspace) {
00560       real_row_->max_nonspace = characters_[i].max_gap();
00561     }
00562   }
00563   real_row_->space_threshold =
00564       MIN((real_row_->max_nonspace + real_row_->min_space) / 2,
00565           real_row_->xheight);
00566   real_row_->used_dm_model = false;
00567 
00568   // Setup char_cells.
00569   ICOORDELT_IT cell_it = &real_row_->char_cells;
00570   ICOORDELT *cell = new ICOORDELT(real_body(0).left(), 0);
00571   cell_it.add_after_then_move(cell);
00572 
00573   int right = real_body(0).right();
00574   for (int i = 1; i < num_chars(); ++i) {
00575     // Put a word break if gap between two characters is bigger than
00576     // space_threshold.  Don't break if none of two characters
00577     // couldn't be "finalized", because maybe they need to be merged
00578     // to one character.
00579     if ((is_final(i - 1) || is_final(i)) &&
00580         real_body(i - 1).x_gap(real_body(i)) > space_threshold) {
00581       cell = new ICOORDELT(right + 1, 0);
00582       cell_it.add_after_then_move(cell);
00583       while (right + pitch_ < box(i).left()) {
00584         right += pitch_;
00585         cell = new ICOORDELT(right + 1, 0);
00586         cell_it.add_after_then_move(cell);
00587       }
00588       right = box(i).left();
00589     }
00590     cell = new ICOORDELT((right + real_body(i).left()) / 2, 0);
00591     cell_it.add_after_then_move(cell);
00592     right = real_body(i).right();
00593   }
00594 
00595   cell = new ICOORDELT(right + 1, 0);
00596   cell_it.add_after_then_move(cell);
00597 
00598   // TODO(takenaka): add code to store alignment/fragmentation
00599   // information to blobs so that it can be reused later, e.g. in
00600   // recognition phase.
00601 }
00602 
00603 void FPRow::EstimatePitch(bool pass1) {
00604   good_pitches_.Clear();
00605   all_pitches_.Clear();
00606   good_gaps_.Clear();
00607   all_gaps_.Clear();
00608   heights_.Clear();
00609   if (num_chars() == 0) return;
00610 
00611   inT32 cx0, cx1;
00612   bool prev_was_good = is_good(0);
00613   cx0 = center_x(0);
00614 
00615   heights_.Add(box(0).height());
00616   for (int i = 1; i < num_chars(); i++) {
00617     cx1 = center_x(i);
00618     inT32 pitch = cx1 - cx0;
00619     inT32 gap = MAX(0, real_body(i - 1).x_gap(real_body(i)));
00620 
00621     heights_.Add(box(i).height());
00622     // Ignore if the pitch is too close.  But don't ignore wide pitch
00623     // may be the result of large tracking.
00624     if (pitch > height_ * 0.5) {
00625       all_pitches_.Add(pitch);
00626       all_gaps_.Add(gap);
00627       if (is_good(i)) {
00628         // In pass1 (after Pass1Analyze()), all characters marked as
00629         // "good" have a good consistent pitch with their previous
00630         // characters.  However, it's not true in pass2 and a good
00631         // character may have a good pitch only between its successor.
00632         // So we collect only pitch values between two good
00633         // characters. and within tolerance in pass2.
00634         if (pass1 ||
00635             (prev_was_good &&
00636              abs(estimated_pitch_ - pitch) < kFPTolerance * estimated_pitch_)) {
00637           good_pitches_.Add(pitch);
00638           if (!is_box_modified(i - 1) && !is_box_modified(i)) {
00639             good_gaps_.Add(gap);
00640           }
00641         }
00642         prev_was_good = true;
00643       } else {
00644         prev_was_good = false;
00645       }
00646     }
00647     cx0 = cx1;
00648   }
00649 
00650   good_pitches_.Finish();
00651   all_pitches_.Finish();
00652   good_gaps_.Finish();
00653   all_gaps_.Finish();
00654   heights_.Finish();
00655 
00656   height_ = heights_.ile(0.875);
00657   if (all_pitches_.size() == 0) {
00658     pitch_ = 0.0f;
00659     gap_ = 0.0f;
00660   } else if (good_pitches_.size() < 2) {
00661     // We don't have enough data to estimate the pitch of this row yet.
00662     // Use median of all pitches as the initial guess.
00663     pitch_ = all_pitches_.median();
00664     ASSERT_HOST(pitch_ > 0.0f);
00665     gap_ = all_gaps_.ile(0.125);
00666   } else {
00667     pitch_ = good_pitches_.median();
00668     ASSERT_HOST(pitch_ > 0.0f);
00669     gap_ = good_gaps_.ile(0.125);
00670   }
00671 }
00672 
00673 void FPRow::DebugOutputResult(int row_index) {
00674   if (num_chars() > 0) {
00675     tprintf("Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, "
00676             "space_size=%f, space_threshold=%d, xheight=%f\n",
00677             row_index, (int)(real_row_->pitch_decision),
00678             real_row_->fixed_pitch, real_row_->max_nonspace,
00679             real_row_->space_size, real_row_->space_threshold,
00680             real_row_->xheight);
00681 
00682     for (int i = 0; i < num_chars(); i++) {
00683       tprintf("Char %d: is_final=%d is_good=%d num_blobs=%d: ",
00684               i, is_final(i), is_good(i), character(i)->num_blobs());
00685       box(i).print();
00686     }
00687   }
00688 }
00689 
00690 void FPRow::Pass1Analyze() {
00691   if (num_chars() < 2) return;
00692 
00693   if (estimated_pitch_ > 0.0f) {
00694     for (int i = 2; i < num_chars(); i++) {
00695       if (is_good_pitch(estimated_pitch_, box(i - 2), box(i-1)) &&
00696           is_good_pitch(estimated_pitch_, box(i - 1), box(i))) {
00697         mark_good(i - 1);
00698       }
00699     }
00700   } else {
00701     for (int i = 2; i < num_chars(); i++) {
00702       if (is_good_pitch(box_pitch(box(i-2), box(i-1)), box(i - 1), box(i))) {
00703         mark_good(i - 1);
00704       }
00705     }
00706   }
00707   character(0)->set_alignment(character(1)->alignment());
00708   character(num_chars() - 1)->set_alignment(
00709       character(num_chars() - 2)->alignment());
00710 }
00711 
00712 bool FPRow::Pass2Analyze() {
00713   bool changed = false;
00714   if (num_chars() <= 1 || estimated_pitch_ == 0.0f) {
00715     return false;
00716   }
00717   for (int i = 0; i < num_chars(); i++) {
00718     if (is_final(i)) continue;
00719 
00720     FPChar::Alignment alignment = character(i)->alignment();
00721     bool intersecting = false;
00722     bool not_intersecting = false;
00723 
00724     if (i < num_chars() - 1 && is_final(i + 1)) {
00725       // Next character is already finalized. Estimate the imaginary
00726       // body including this character based on the character. Skip
00727       // whitespace if necessary.
00728       bool skipped_whitespaces = false;
00729       float c1 = center_x(i + 1)  - 1.5 * estimated_pitch_;
00730       while (c1 > box(i).right()) {
00731         skipped_whitespaces = true;
00732         c1 -= estimated_pitch_;
00733       }
00734       TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top());
00735 
00736       // Collect all characters that mostly fit in the region.
00737       // Also, their union height shouldn't be too big.
00738       int j = i;
00739       TBOX merged;
00740       while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) &&
00741              merged.bounding_union(box(j)).height() <
00742              estimated_pitch_ * (1 + kFPTolerance)) {
00743         merged += box(j);
00744         j--;
00745       }
00746 
00747       if (j >= 0 && significant_overlap(ibody, box(j))) {
00748         // character(j) lies on the character boundary and doesn't fit
00749         // well into the imaginary body.
00750         if (!is_final(j)) intersecting = true;
00751       } else {
00752         not_intersecting = true;
00753         if (i - j > 0) {
00754           // Merge character(j+1) ... character(i) because they fit
00755           // into the body nicely.
00756           if (i - j == 1) {
00757             // Only one char in the imaginary body.
00758             if (!skipped_whitespaces) mark_good(i);
00759             // set ibody as bounding box of this character to get
00760             // better pitch analysis result for halfwidth glyphs
00761             // followed by a halfwidth space.
00762             if (box(i).width() <= estimated_pitch_ * 0.5) {
00763               ibody += box(i);
00764               character(i)->set_box(ibody);
00765             }
00766             character(i)->set_merge_to_prev(false);
00767             finalize(i);
00768           } else {
00769             for (int k = i; k > j + 1; k--) {
00770               character(k)->set_merge_to_prev(true);
00771             }
00772           }
00773         }
00774       }
00775     }
00776     if (i > 0 && is_final(i - 1)) {
00777       // Now we repeat everything from the opposite side.  Previous
00778       // character is already finalized. Estimate the imaginary body
00779       // including this character based on the character.
00780       bool skipped_whitespaces = false;
00781       float c1 = center_x(i - 1) + 1.5 * estimated_pitch_;
00782       while (c1 < box(i).left()) {
00783         skipped_whitespaces = true;
00784         c1 += estimated_pitch_;
00785       }
00786       TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top());
00787 
00788       int j = i;
00789       TBOX merged;
00790       while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) &&
00791              merged.bounding_union(box(j)).height() <
00792              estimated_pitch_ * (1 + kFPTolerance)) {
00793         merged += box(j);
00794         j++;
00795       }
00796 
00797       if (j < num_chars() && significant_overlap(ibody, box(j))) {
00798         if (!is_final(j)) intersecting = true;
00799       } else {
00800         not_intersecting = true;
00801         if (j - i > 0) {
00802           if (j - i == 1) {
00803             if (!skipped_whitespaces) mark_good(i);
00804             if (box(i).width() <= estimated_pitch_ * 0.5) {
00805               ibody += box(i);
00806               character(i)->set_box(ibody);
00807             }
00808             character(i)->set_merge_to_prev(false);
00809             finalize(i);
00810           } else {
00811             for (int k = i + 1; k < j; k++) {
00812               character(k)->set_merge_to_prev(true);
00813             }
00814           }
00815         }
00816       }
00817     }
00818 
00819     // This character doesn't fit well into the estimated imaginary
00820     // bodies. Mark it as bad.
00821     if (intersecting && !not_intersecting) mark_bad(i);
00822     if (character(i)->alignment() != alignment ||
00823         character(i)->merge_to_prev()) {
00824       changed = true;
00825     }
00826   }
00827 
00828   return changed;
00829 }
00830 
00831 void FPRow::MergeFragments() {
00832   int last_char = 0;
00833 
00834   for (int j = 0; j < num_chars(); ++j) {
00835     if (character(j)->merge_to_prev()) {
00836       character(last_char)->Merge(*character(j));
00837       character(j)->set_delete_flag(true);
00838       clear_alignment(last_char);
00839       character(j-1)->set_merge_to_prev(false);
00840     } else {
00841       last_char = j;
00842     }
00843   }
00844   DeleteChars();
00845 }
00846 
00847 void FPRow::FinalizeLargeChars() {
00848   float row_pitch = estimated_pitch();
00849   for (int i = 0; i < num_chars(); i++) {
00850     if (is_final(i)) continue;
00851 
00852     // Finalize if both neighbors are finalized. We have no other choice.
00853     if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) {
00854       finalize(i);
00855       continue;
00856     }
00857 
00858     float cx = center_x(i);
00859     TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1);
00860     if (i > 0) {
00861       // The preceding character significantly intersects with the
00862       // imaginary body of this character. Let Pass2Analyze() handle
00863       // this case.
00864       if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) continue;
00865       if (!is_final(i - 1)) {
00866         TBOX merged = box(i);
00867         merged += box(i - 1);
00868         if (merged.width() < row_pitch) continue;
00869         // This character cannot be finalized yet because it can be
00870         // merged with the previous one.  Again, let Pass2Analyze()
00871         // handle this case.
00872       }
00873     }
00874     if (i < num_chars() - 1) {
00875       if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) continue;
00876       if (!is_final(i + 1)) {
00877         TBOX merged = box(i);
00878         merged += box(i + 1);
00879         if (merged.width() < row_pitch) continue;
00880       }
00881     }
00882     finalize(i);
00883   }
00884 
00885   // Update alignment decision.  We only consider finalized characters
00886   // in pass2.  E.g. if a finalized character C has another finalized
00887   // character L on its left and a not-finalized character R on its
00888   // right, we mark C as good if the pitch between C and L is good,
00889   // regardless of the pitch between C and R.
00890   for (int i = 0; i < num_chars(); i++) {
00891     if (!is_final(i)) continue;
00892     bool good_pitch = false;
00893     bool bad_pitch = false;
00894     if (i > 0 && is_final(i - 1)) {
00895       if (is_good_pitch(row_pitch, box(i - 1), box(i))) {
00896         good_pitch = true;
00897       } else {
00898         bad_pitch = true;
00899       }
00900     }
00901     if (i < num_chars() - 1 && is_final(i + 1)) {
00902       if (is_good_pitch(row_pitch, box(i), box(i + 1))) {
00903         good_pitch = true;
00904       } else {
00905         bad_pitch = true;
00906       }
00907     }
00908     if (good_pitch && !bad_pitch) mark_good(i);
00909     else if (!good_pitch && bad_pitch) mark_bad(i);
00910   }
00911 }
00912 
00913 class FPAnalyzer {
00914  public:
00915   FPAnalyzer(): page_tr_(), rows_() { }
00916   ~FPAnalyzer() { }
00917 
00918   void Init(ICOORD page_tr, TO_BLOCK_LIST *port_blocks);
00919 
00920   void Pass1Analyze() {
00921     for (int i = 0; i < rows_.size(); i++) rows_[i].Pass1Analyze();
00922   }
00923 
00924   // Estimate character pitch for each row.  The argument pass1 can be
00925   // set to true if the function is called after Pass1Analyze(), to
00926   // eliminate some redundant computation.
00927   void EstimatePitch(bool pass1);
00928 
00929   bool maybe_fixed_pitch() {
00930     if (rows_.empty() ||
00931         rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) return false;
00932     return true;
00933   }
00934 
00935   void MergeFragments() {
00936     for (int i = 0; i < rows_.size(); i++) rows_[i].MergeFragments();
00937   }
00938 
00939   void FinalizeLargeChars() {
00940     for (int i = 0; i < rows_.size(); i++) rows_[i].FinalizeLargeChars();
00941   }
00942 
00943   bool Pass2Analyze() {
00944     bool changed = false;
00945     for (int i = 0; i < rows_.size(); i++) {
00946       if (rows_[i].Pass2Analyze()) {
00947         changed = true;
00948       }
00949     }
00950     return changed;
00951   }
00952 
00953   void OutputEstimations() {
00954     for (int i = 0; i < rows_.size(); i++) rows_[i].OutputEstimations();
00955     // Don't we need page-level estimation of gaps/spaces?
00956   }
00957 
00958   void DebugOutputResult() {
00959     tprintf("FPAnalyzer: final result\n");
00960     for (int i = 0; i < rows_.size(); i++) rows_[i].DebugOutputResult(i);
00961   }
00962 
00963   int num_rows() {
00964     return rows_.size();
00965   }
00966 
00967   // Returns the upper limit for pass2 loop iteration.
00968   int max_iteration() {
00969     // We're fixing at least one character per iteration. So basically
00970     // we shouldn't require more than max_chars_per_row_ iterations.
00971     return max_chars_per_row_ + 100;
00972   }
00973 
00974  private:
00975   ICOORD page_tr_;
00976   GenericVector<FPRow> rows_;
00977   int num_tall_rows_;
00978   int num_bad_rows_;
00979   int num_empty_rows_;
00980   int max_chars_per_row_;
00981 };
00982 
00983 void FPAnalyzer::Init(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) {
00984   page_tr_ = page_tr;
00985 
00986   TO_BLOCK_IT block_it;
00987   block_it.set_to_list (port_blocks);
00988 
00989   for (block_it.mark_cycle_pt(); !block_it.cycled_list();
00990        block_it.forward()) {
00991     TO_BLOCK *block = block_it.data();
00992     if (!block->get_rows()->empty()) {
00993       ASSERT_HOST(block->xheight > 0);
00994       find_repeated_chars(block, FALSE);
00995     }
00996   }
00997 
00998   num_empty_rows_ = 0;
00999   max_chars_per_row_ = 0;
01000   for (block_it.mark_cycle_pt(); !block_it.cycled_list();
01001        block_it.forward()) {
01002     TO_ROW_IT row_it = block_it.data()->get_rows();
01003     for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
01004       FPRow row;
01005       row.Init(row_it.data());
01006       rows_.push_back(row);
01007       int num_chars = rows_.back().num_chars();
01008       if (num_chars <= 1) num_empty_rows_++;
01009       if (num_chars > max_chars_per_row_) max_chars_per_row_ = num_chars;
01010     }
01011   }
01012 }
01013 
01014 void FPAnalyzer::EstimatePitch(bool pass1) {
01015   LocalCorrelation pitch_height_stats;
01016 
01017   num_tall_rows_ = 0;
01018   num_bad_rows_ = 0;
01019   pitch_height_stats.Clear();
01020   for (int i = 0; i < rows_.size(); i++) {
01021     rows_[i].EstimatePitch(pass1);
01022     if (rows_[i].good_pitches()) {
01023       pitch_height_stats.Add(rows_[i].height() + rows_[i].gap(),
01024                              rows_[i].pitch(), rows_[i].good_pitches());
01025       if (rows_[i].height_pitch_ratio() > 1.1) num_tall_rows_++;
01026     } else {
01027       num_bad_rows_++;
01028     }
01029   }
01030 
01031   pitch_height_stats.Finish();
01032   for (int i = 0; i < rows_.size(); i++) {
01033     if (rows_[i].good_pitches() >= 5) {
01034       // We have enough evidences. Just use the pitch estimation
01035       // from this row.
01036       rows_[i].set_estimated_pitch(rows_[i].pitch());
01037     } else if (rows_[i].num_chars() > 1) {
01038       float estimated_pitch =
01039           pitch_height_stats.EstimateYFor(rows_[i].height() + rows_[i].gap(),
01040                                           0.1);
01041       // CJK characters are more likely to be fragmented than poorly
01042       // chopped. So trust the page-level estimation of character
01043       // pitch only if it's larger than row-level estimation or
01044       // row-level estimation is too large (2x bigger than row height).
01045       if (estimated_pitch > rows_[i].pitch() ||
01046           rows_[i].pitch() > rows_[i].height() * 2.0) {
01047         rows_[i].set_estimated_pitch(estimated_pitch);
01048       } else {
01049         rows_[i].set_estimated_pitch(rows_[i].pitch());
01050       }
01051     }
01052   }
01053 }
01054 
01055 }  // namespace
01056 
01057 void compute_fixed_pitch_cjk(ICOORD page_tr,
01058                              TO_BLOCK_LIST *port_blocks) {
01059   FPAnalyzer analyzer;
01060   analyzer.Init(page_tr, port_blocks);
01061   if (analyzer.num_rows() == 0) return;
01062 
01063   analyzer.Pass1Analyze();
01064   analyzer.EstimatePitch(true);
01065 
01066   // Perform pass1 analysis again with the initial estimation of row
01067   // pitches, for better estimation.
01068   analyzer.Pass1Analyze();
01069   analyzer.EstimatePitch(true);
01070 
01071   // Early exit if the page doesn't seem to contain fixed pitch rows.
01072   if (!analyzer.maybe_fixed_pitch()) {
01073     if (textord_debug_pitch_test) {
01074       tprintf("Page doesn't seem to contain fixed pitch rows\n");
01075     }
01076     return;
01077   }
01078 
01079   int iteration = 0;
01080   do {
01081     analyzer.MergeFragments();
01082     analyzer.FinalizeLargeChars();
01083     analyzer.EstimatePitch(false);
01084     iteration++;
01085   } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration());
01086 
01087   if (textord_debug_pitch_test) {
01088     tprintf("compute_fixed_pitch_cjk finished after %d iteration (limit=%d)\n",
01089             iteration, analyzer.max_iteration());
01090   }
01091 
01092   analyzer.OutputEstimations();
01093   if (textord_debug_pitch_test) analyzer.DebugOutputResult();
01094 }