Tesseract
3.02
|
00001 00002 // File: cjkpitch.cpp 00003 // Description: Code to determine fixed pitchness and the pitch if fixed, 00004 // for CJK text. 00005 // Copyright 2011 Google Inc. All Rights Reserved. 00006 // Author: takenaka@google.com (Hiroshi Takenaka) 00007 // Created: Mon Jun 27 12:48:35 JST 2011 00008 // 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 #include "cjkpitch.h" 00021 #include "genericvector.h" 00022 #include "ndminx.h" 00023 #include "topitch.h" 00024 #include "tovars.h" 00025 00026 BOOL_VAR(textord_space_size_is_variable, FALSE, 00027 "If true, word delimiter spaces are assumed to have " 00028 "variable width, even though characters have fixed pitch."); 00029 00030 namespace { 00031 00032 // Allow +/-10% error for character pitch / body size. 00033 static const float kFPTolerance = 0.1; 00034 00035 // Minimum ratio of "good" character pitch for a row to be considered 00036 // to be fixed-pitch. 00037 static const float kFixedPitchThreshold = 0.35; 00038 00039 // rank statistics for a small collection of float values. 00040 class SimpleStats { 00041 public: 00042 SimpleStats(): finalized_(false), values_() { } 00043 ~SimpleStats() { } 00044 00045 void Clear() { 00046 values_.clear(); 00047 finalized_ = false; 00048 } 00049 00050 void Add(float value) { 00051 values_.push_back(value); 00052 finalized_ = false; 00053 } 00054 00055 void Finish() { 00056 values_.sort(float_compare); 00057 finalized_ = true; 00058 } 00059 00060 float ile(double frac) { 00061 if (!finalized_) Finish(); 00062 if (values_.empty()) return 0.0; 00063 if (frac >= 1.0) return values_.back(); 00064 if (frac <= 0.0 || values_.size() == 1) return values_[0]; 00065 int index = static_cast<int>((values_.size() - 1) * frac); 00066 float reminder = (values_.size() - 1) * frac - index; 00067 00068 return values_[index] * (1.0 - reminder) + 00069 values_[index + 1] * reminder; 00070 } 00071 00072 float median() { 00073 return ile(0.5); 00074 } 00075 00076 float maximum() { 00077 if (!finalized_) Finish(); 00078 if (values_.empty()) return 0.0; 00079 return values_.back(); 00080 } 00081 00082 float minimum() { 00083 if (!finalized_) Finish(); 00084 if (values_.empty()) return 0.0; 00085 return values_[0]; 00086 } 00087 00088 int size() const { 00089 return values_.size(); 00090 } 00091 00092 private: 00093 static int float_compare(const void* a, const void* b) { 00094 const float* f_a = reinterpret_cast<const float*>(a); 00095 const float* f_b = reinterpret_cast<const float*>(b); 00096 return (*f_a > *f_b) ? 1 : ((*f_a < *f_b) ? -1 : 0); 00097 } 00098 00099 bool finalized_; 00100 GenericVector<float> values_; 00101 }; 00102 00103 // statistics for a small collection of float pairs (x, y). 00104 // EstimateYFor(x, r) returns the estimated y at x, based on 00105 // existing samples between x*(1-r) ~ x*(1+r). 00106 class LocalCorrelation { 00107 public: 00108 struct float_pair { 00109 float x, y; 00110 int vote; 00111 }; 00112 00113 LocalCorrelation(): finalized_(false) { } 00114 ~LocalCorrelation() { } 00115 00116 void Finish() { 00117 values_.sort(float_pair_compare); 00118 finalized_ = true; 00119 } 00120 00121 void Clear() { 00122 finalized_ = false; 00123 } 00124 00125 void Add(float x, float y, int v) { 00126 struct float_pair value; 00127 value.x = x; 00128 value.y = y; 00129 value.vote = v; 00130 values_.push_back(value); 00131 finalized_ = false; 00132 } 00133 00134 float EstimateYFor(float x, float r) { 00135 ASSERT_HOST(finalized_); 00136 int start = 0, end = values_.size(); 00137 // Because the number of samples (used_) is assumed to be small, 00138 // just use linear search to find values within the range. 00139 while (start < values_.size() && values_[start].x < x * (1.0 - r)) start++; 00140 while (end - 1 >= 0 && values_[end - 1].x > x * (1.0 + r)) end--; 00141 00142 // Fall back to the global average if there are no data within r 00143 // of x. 00144 if (start >= end) { 00145 start = 0; 00146 end = values_.size(); 00147 } 00148 00149 // Compute weighted average of the values. 00150 float rc = 0; 00151 int vote = 0; 00152 for (int i = start; i < end; i++) { 00153 rc += values_[i].vote * x * values_[i].y / values_[i].x; 00154 vote += values_[i].vote; 00155 } 00156 00157 return rc / vote; 00158 } 00159 00160 private: 00161 static int float_pair_compare(const void* a, const void* b) { 00162 const float_pair* f_a = reinterpret_cast<const float_pair*>(a); 00163 const float_pair* f_b = reinterpret_cast<const float_pair*>(b); 00164 return (f_a->x > f_b->x) ? 1 : ((f_a->x < f_b->x) ? -1 : 0); 00165 } 00166 00167 bool finalized_; 00168 GenericVector<struct float_pair> values_; 00169 }; 00170 00171 // Class to represent a character on a fixed pitch row. A FPChar may 00172 // consist of multiple blobs (BLOBNBOX's). 00173 class FPChar { 00174 public: 00175 enum Alignment { 00176 ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD 00177 }; 00178 00179 FPChar(): box_(), real_body_(), 00180 from_(NULL), to_(NULL), num_blobs_(0), max_gap_(0), 00181 final_(false), alignment_(ALIGN_UNKNOWN), 00182 merge_to_prev_(false), delete_flag_(false) { 00183 } 00184 00185 // Initialize from blob. 00186 void Init(BLOBNBOX *blob) { 00187 box_ = blob->bounding_box(); 00188 real_body_ = box_; 00189 from_ = to_ = blob; 00190 num_blobs_ = 1; 00191 } 00192 00193 // Merge this character with "next". The "next" character should 00194 // consist of succeeding blobs on the same row. 00195 void Merge(const FPChar &next) { 00196 int gap = real_body_.x_gap(next.real_body_); 00197 if (gap > max_gap_) max_gap_ = gap; 00198 00199 box_ += next.box_; 00200 real_body_ += next.real_body_; 00201 to_ = next.to_; 00202 num_blobs_ += next.num_blobs_; 00203 } 00204 00205 // Accessors. 00206 const TBOX &box() const { return box_; } 00207 void set_box(const TBOX &box) { 00208 box_ = box; 00209 } 00210 const TBOX &real_body() const { return real_body_; } 00211 00212 bool is_final() const { return final_; } 00213 void set_final(bool flag) { 00214 final_ = flag; 00215 } 00216 00217 const Alignment& alignment() const { 00218 return alignment_; 00219 } 00220 void set_alignment(Alignment alignment) { 00221 alignment_ = alignment; 00222 } 00223 00224 bool merge_to_prev() const { 00225 return merge_to_prev_; 00226 } 00227 void set_merge_to_prev(bool flag) { 00228 merge_to_prev_ = flag; 00229 } 00230 00231 bool delete_flag() const { 00232 return delete_flag_; 00233 } 00234 void set_delete_flag(bool flag) { 00235 delete_flag_ = flag; 00236 } 00237 00238 int max_gap() const { 00239 return max_gap_; 00240 } 00241 00242 int num_blobs() const { 00243 return num_blobs_; 00244 } 00245 00246 private: 00247 TBOX box_; // Rectangle region considered to be occupied by this 00248 // character. It could be bigger than the bounding box. 00249 TBOX real_body_; // Real bounding box of this character. 00250 BLOBNBOX *from_; // The first blob of this character. 00251 BLOBNBOX *to_; // The last blob of this character. 00252 int num_blobs_; // Number of blobs that belong to this character. 00253 int max_gap_; // Maximum x gap between the blobs. 00254 00255 bool final_; // True if alignment/fragmentation decision for this 00256 // character is finalized. 00257 00258 Alignment alignment_; // Alignment status. 00259 bool merge_to_prev_; // True if this is a fragmented blob that 00260 // needs to be merged to the previous 00261 // character. 00262 00263 int delete_flag_; // True if this character is merged to another 00264 // one and needs to be deleted. 00265 }; 00266 00267 // Class to represent a fixed pitch row, as a linear collection of 00268 // FPChar's. 00269 class FPRow { 00270 public: 00271 FPRow() : pitch_(0.0f), estimated_pitch_(0.0f), 00272 all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(), 00273 heights_(), characters_(), real_row_(NULL) { 00274 } 00275 00276 ~FPRow() { } 00277 00278 // Initialize from TD_ROW. 00279 void Init(TO_ROW *row); 00280 00281 // Estimate character pitch of this row, based on current alignment 00282 // status of underlying FPChar's. The argument pass1 can be set to 00283 // true if the function is called after Pass1Analyze(), to eliminate 00284 // some redundant computation. 00285 void EstimatePitch(bool pass1); 00286 00287 // Check each character if it has good character pitches between its 00288 // predecessor and its successor and set its alignment status. If 00289 // we already calculated the estimated pitch for this row, the value 00290 // is used. If we didn't, a character is considered to be good, if 00291 // the pitches between its predecessor and its successor are almost 00292 // equal. 00293 void Pass1Analyze(); 00294 00295 // Find characters that fit nicely into one imaginary body next to a 00296 // character which is already finalized. Then mark them as character 00297 // fragments. 00298 bool Pass2Analyze(); 00299 00300 // Merge FPChars marked as character fragments into one. 00301 void MergeFragments(); 00302 00303 // Finalize characters that are already large enough and cannot be 00304 // merged with others any more. 00305 void FinalizeLargeChars(); 00306 00307 // Ouput pitch estimation results to attributes of TD_ROW. 00308 void OutputEstimations(); 00309 00310 void DebugOutputResult(int row_index); 00311 00312 int good_pitches() { 00313 return good_pitches_.size(); 00314 } 00315 00316 int good_gaps() { 00317 return good_gaps_.size(); 00318 } 00319 00320 float pitch() { 00321 return pitch_; 00322 } 00323 00324 float estimated_pitch() { 00325 return estimated_pitch_; 00326 } 00327 00328 void set_estimated_pitch(float v) { 00329 estimated_pitch_ = v; 00330 } 00331 00332 float height() { 00333 return height_; 00334 } 00335 00336 float height_pitch_ratio() { 00337 if (good_pitches_.size() < 2) return -1.0; 00338 return height_ / good_pitches_.median(); 00339 } 00340 00341 float gap() { 00342 return gap_; 00343 } 00344 00345 int num_chars() { 00346 return characters_.size(); 00347 } 00348 FPChar *character(int i) { 00349 return &characters_[i]; 00350 } 00351 00352 const TBOX &box(int i) { 00353 return characters_[i].box(); 00354 } 00355 00356 const TBOX &real_body(int i) { 00357 return characters_[i].real_body(); 00358 } 00359 00360 bool is_box_modified(int i) { 00361 return !(characters_[i].box() == characters_[i].real_body()); 00362 } 00363 00364 float center_x(int i) { 00365 return (characters_[i].box().left() + characters_[i].box().right()) / 2.0; 00366 } 00367 00368 bool is_final(int i) { 00369 return characters_[i].is_final(); 00370 } 00371 00372 void finalize(int i) { 00373 characters_[i].set_final(true); 00374 } 00375 00376 bool is_good(int i) { 00377 return characters_[i].alignment() == FPChar::ALIGN_GOOD; 00378 } 00379 00380 bool is_bad(int i) { 00381 return characters_[i].alignment() == FPChar::ALIGN_BAD; 00382 } 00383 00384 bool is_unknown(int i) { 00385 return characters_[i].alignment() == FPChar::ALIGN_UNKNOWN; 00386 } 00387 00388 void mark_good(int i) { 00389 characters_[i].set_alignment(FPChar::ALIGN_GOOD); 00390 } 00391 00392 void mark_bad(int i) { 00393 characters_[i].set_alignment(FPChar::ALIGN_BAD); 00394 } 00395 00396 void clear_alignment(int i) { 00397 characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN); 00398 } 00399 00400 private: 00401 static float x_overlap_fraction(const TBOX& box1, const TBOX& box2) { 00402 if (MIN(box1.width(), box2.width()) == 0) return 0.0; 00403 return -box1.x_gap(box2) / (float)MIN(box1.width(), box2.width()); 00404 } 00405 00406 static bool mostly_overlap(const TBOX& box1, const TBOX& box2) { 00407 return x_overlap_fraction(box1, box2) > 0.9; 00408 } 00409 00410 static bool significant_overlap(const TBOX& box1, const TBOX& box2) { 00411 if (MIN(box1.width(), box2.width()) == 0) return false; 00412 int overlap = -box1.x_gap(box2); 00413 return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1; 00414 } 00415 00416 static float box_pitch(const TBOX& ref, const TBOX& box) { 00417 return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0; 00418 } 00419 00420 // Check if two neighboring characters satisfy the fixed pitch model. 00421 static bool is_good_pitch(float pitch, const TBOX& box1, const TBOX& box2) { 00422 // Character box shouldn't exceed pitch. 00423 if (box1.width() >= pitch * (1.0 + kFPTolerance) || 00424 box2.width() >= pitch * (1.0 + kFPTolerance) || 00425 box1.height() >= pitch * (1.0 + kFPTolerance) || 00426 box2.height() >= pitch * (1.0 + kFPTolerance)) return false; 00427 00428 const float real_pitch = box_pitch(box1, box2); 00429 if (abs(real_pitch - pitch) < pitch * kFPTolerance) return true; 00430 00431 if (textord_space_size_is_variable) { 00432 // Hangul characters usually have fixed pitch, but words are 00433 // delimited by space which can be narrower than characters. 00434 if (real_pitch > pitch && real_pitch < pitch * 2.0 && 00435 real_pitch - box1.x_gap(box2) < pitch) { 00436 return true; 00437 } 00438 } 00439 return false; 00440 } 00441 00442 static bool is_interesting_blob(const BLOBNBOX *blob) { 00443 return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER; 00444 } 00445 00446 // Cleanup chars that are already merged to others. 00447 void DeleteChars() { 00448 int index = 0; 00449 for (int i = 0; i < characters_.size(); ++i) { 00450 if (!characters_[i].delete_flag()) { 00451 if (index != i) characters_[index] = characters_[i]; 00452 index++; 00453 } 00454 } 00455 characters_.truncate(index); 00456 } 00457 00458 float pitch_; // Character pitch. 00459 float estimated_pitch_; // equal to pitch_ if pitch_ is considered 00460 // to be good enough. 00461 float height_; // Character height. 00462 float gap_; // Minimum gap between characters. 00463 00464 // Pitches between any two successive characters. 00465 SimpleStats all_pitches_; 00466 // Gaps between any two successive characters. 00467 SimpleStats all_gaps_; 00468 // Pitches between any two successive characters that are consistent 00469 // with the fixed pitch model. 00470 SimpleStats good_pitches_; 00471 // Gaps between any two successive characters that are consistent 00472 // with the fixed pitch model. 00473 SimpleStats good_gaps_; 00474 00475 SimpleStats heights_; 00476 00477 GenericVector<FPChar> characters_; 00478 TO_ROW *real_row_; // Underlying TD_ROW for this row. 00479 }; 00480 00481 void FPRow::Init(TO_ROW *row) { 00482 ASSERT_HOST(row != NULL); 00483 ASSERT_HOST(row->xheight > 0); 00484 real_row_ = row; 00485 real_row_->pitch_decision = PITCH_CORR_PROP; // Default decision. 00486 00487 BLOBNBOX_IT blob_it = row->blob_list(); 00488 // Initialize characters_ and compute the initial estimation of 00489 // character height. 00490 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00491 if (is_interesting_blob(blob_it.data())) { 00492 FPChar fp_char; 00493 fp_char.Init(blob_it.data()); 00494 // Merge unconditionally if two blobs overlap. 00495 if (!characters_.empty() && 00496 significant_overlap(fp_char.box(), characters_.back().box())) { 00497 characters_.back().Merge(fp_char); 00498 } else { 00499 characters_.push_back(fp_char); 00500 } 00501 TBOX bound = blob_it.data()->bounding_box(); 00502 if (bound.height() * 3.0 > bound.width()) { 00503 heights_.Add(bound.height()); 00504 } 00505 } 00506 } 00507 heights_.Finish(); 00508 height_ = heights_.ile(0.875); 00509 } 00510 00511 void FPRow::OutputEstimations() { 00512 if (good_pitches_.size() == 0) { 00513 pitch_ = 0.0f; 00514 real_row_->pitch_decision = PITCH_CORR_PROP; 00515 return; 00516 } 00517 00518 pitch_ = good_pitches_.median(); 00519 real_row_->fixed_pitch = pitch_; 00520 // good_gaps_.ile(0.125) can be large if most characters on the row 00521 // are skinny. Use pitch_ - height_ instead if it's smaller, but 00522 // positive. 00523 real_row_->kern_size = real_row_->pr_nonsp = 00524 MIN(good_gaps_.ile(0.125), MAX(pitch_ - height_, 0)); 00525 real_row_->body_size = pitch_ - real_row_->kern_size; 00526 00527 if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) { 00528 // If more than half of the characters of a line don't fit to the 00529 // fixed pitch model, consider the line to be propotional. 50% 00530 // seems to be a good threshold in practice as well. 00531 // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in 00532 // real_row_ as a partial estimation result and try to use them in the 00533 // normalization process. 00534 real_row_->pitch_decision = PITCH_CORR_PROP; 00535 return; 00536 } else if (good_pitches_.size() > all_pitches_.size() * 0.75) { 00537 real_row_->pitch_decision = PITCH_DEF_FIXED; 00538 } else { 00539 real_row_->pitch_decision = PITCH_CORR_FIXED; 00540 } 00541 00542 real_row_->space_size = real_row_->pr_space = pitch_; 00543 // Set min_space to 50% of character pitch so that we can break CJK 00544 // text at a half-width space after punctuation. 00545 real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5; 00546 00547 // Don't consider a quarter space as a real space, because it's used 00548 // for line justification in traditional Japanese books. 00549 real_row_->max_nonspace = MAX(pitch_ * 0.25 + good_gaps_.minimum(), 00550 (double)good_gaps_.ile(0.875)); 00551 00552 int space_threshold = 00553 MIN((real_row_->max_nonspace + real_row_->min_space) / 2, 00554 real_row_->xheight); 00555 00556 // Make max_nonspace larger than any intra-character gap so that 00557 // make_prop_words() won't break a row at the middle of a character. 00558 for (int i = 0; i < num_chars(); ++i) { 00559 if (characters_[i].max_gap() > real_row_->max_nonspace) { 00560 real_row_->max_nonspace = characters_[i].max_gap(); 00561 } 00562 } 00563 real_row_->space_threshold = 00564 MIN((real_row_->max_nonspace + real_row_->min_space) / 2, 00565 real_row_->xheight); 00566 real_row_->used_dm_model = false; 00567 00568 // Setup char_cells. 00569 ICOORDELT_IT cell_it = &real_row_->char_cells; 00570 ICOORDELT *cell = new ICOORDELT(real_body(0).left(), 0); 00571 cell_it.add_after_then_move(cell); 00572 00573 int right = real_body(0).right(); 00574 for (int i = 1; i < num_chars(); ++i) { 00575 // Put a word break if gap between two characters is bigger than 00576 // space_threshold. Don't break if none of two characters 00577 // couldn't be "finalized", because maybe they need to be merged 00578 // to one character. 00579 if ((is_final(i - 1) || is_final(i)) && 00580 real_body(i - 1).x_gap(real_body(i)) > space_threshold) { 00581 cell = new ICOORDELT(right + 1, 0); 00582 cell_it.add_after_then_move(cell); 00583 while (right + pitch_ < box(i).left()) { 00584 right += pitch_; 00585 cell = new ICOORDELT(right + 1, 0); 00586 cell_it.add_after_then_move(cell); 00587 } 00588 right = box(i).left(); 00589 } 00590 cell = new ICOORDELT((right + real_body(i).left()) / 2, 0); 00591 cell_it.add_after_then_move(cell); 00592 right = real_body(i).right(); 00593 } 00594 00595 cell = new ICOORDELT(right + 1, 0); 00596 cell_it.add_after_then_move(cell); 00597 00598 // TODO(takenaka): add code to store alignment/fragmentation 00599 // information to blobs so that it can be reused later, e.g. in 00600 // recognition phase. 00601 } 00602 00603 void FPRow::EstimatePitch(bool pass1) { 00604 good_pitches_.Clear(); 00605 all_pitches_.Clear(); 00606 good_gaps_.Clear(); 00607 all_gaps_.Clear(); 00608 heights_.Clear(); 00609 if (num_chars() == 0) return; 00610 00611 inT32 cx0, cx1; 00612 bool prev_was_good = is_good(0); 00613 cx0 = center_x(0); 00614 00615 heights_.Add(box(0).height()); 00616 for (int i = 1; i < num_chars(); i++) { 00617 cx1 = center_x(i); 00618 inT32 pitch = cx1 - cx0; 00619 inT32 gap = MAX(0, real_body(i - 1).x_gap(real_body(i))); 00620 00621 heights_.Add(box(i).height()); 00622 // Ignore if the pitch is too close. But don't ignore wide pitch 00623 // may be the result of large tracking. 00624 if (pitch > height_ * 0.5) { 00625 all_pitches_.Add(pitch); 00626 all_gaps_.Add(gap); 00627 if (is_good(i)) { 00628 // In pass1 (after Pass1Analyze()), all characters marked as 00629 // "good" have a good consistent pitch with their previous 00630 // characters. However, it's not true in pass2 and a good 00631 // character may have a good pitch only between its successor. 00632 // So we collect only pitch values between two good 00633 // characters. and within tolerance in pass2. 00634 if (pass1 || 00635 (prev_was_good && 00636 abs(estimated_pitch_ - pitch) < kFPTolerance * estimated_pitch_)) { 00637 good_pitches_.Add(pitch); 00638 if (!is_box_modified(i - 1) && !is_box_modified(i)) { 00639 good_gaps_.Add(gap); 00640 } 00641 } 00642 prev_was_good = true; 00643 } else { 00644 prev_was_good = false; 00645 } 00646 } 00647 cx0 = cx1; 00648 } 00649 00650 good_pitches_.Finish(); 00651 all_pitches_.Finish(); 00652 good_gaps_.Finish(); 00653 all_gaps_.Finish(); 00654 heights_.Finish(); 00655 00656 height_ = heights_.ile(0.875); 00657 if (all_pitches_.size() == 0) { 00658 pitch_ = 0.0f; 00659 gap_ = 0.0f; 00660 } else if (good_pitches_.size() < 2) { 00661 // We don't have enough data to estimate the pitch of this row yet. 00662 // Use median of all pitches as the initial guess. 00663 pitch_ = all_pitches_.median(); 00664 ASSERT_HOST(pitch_ > 0.0f); 00665 gap_ = all_gaps_.ile(0.125); 00666 } else { 00667 pitch_ = good_pitches_.median(); 00668 ASSERT_HOST(pitch_ > 0.0f); 00669 gap_ = good_gaps_.ile(0.125); 00670 } 00671 } 00672 00673 void FPRow::DebugOutputResult(int row_index) { 00674 if (num_chars() > 0) { 00675 tprintf("Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, " 00676 "space_size=%f, space_threshold=%d, xheight=%f\n", 00677 row_index, (int)(real_row_->pitch_decision), 00678 real_row_->fixed_pitch, real_row_->max_nonspace, 00679 real_row_->space_size, real_row_->space_threshold, 00680 real_row_->xheight); 00681 00682 for (int i = 0; i < num_chars(); i++) { 00683 tprintf("Char %d: is_final=%d is_good=%d num_blobs=%d: ", 00684 i, is_final(i), is_good(i), character(i)->num_blobs()); 00685 box(i).print(); 00686 } 00687 } 00688 } 00689 00690 void FPRow::Pass1Analyze() { 00691 if (num_chars() < 2) return; 00692 00693 if (estimated_pitch_ > 0.0f) { 00694 for (int i = 2; i < num_chars(); i++) { 00695 if (is_good_pitch(estimated_pitch_, box(i - 2), box(i-1)) && 00696 is_good_pitch(estimated_pitch_, box(i - 1), box(i))) { 00697 mark_good(i - 1); 00698 } 00699 } 00700 } else { 00701 for (int i = 2; i < num_chars(); i++) { 00702 if (is_good_pitch(box_pitch(box(i-2), box(i-1)), box(i - 1), box(i))) { 00703 mark_good(i - 1); 00704 } 00705 } 00706 } 00707 character(0)->set_alignment(character(1)->alignment()); 00708 character(num_chars() - 1)->set_alignment( 00709 character(num_chars() - 2)->alignment()); 00710 } 00711 00712 bool FPRow::Pass2Analyze() { 00713 bool changed = false; 00714 if (num_chars() <= 1 || estimated_pitch_ == 0.0f) { 00715 return false; 00716 } 00717 for (int i = 0; i < num_chars(); i++) { 00718 if (is_final(i)) continue; 00719 00720 FPChar::Alignment alignment = character(i)->alignment(); 00721 bool intersecting = false; 00722 bool not_intersecting = false; 00723 00724 if (i < num_chars() - 1 && is_final(i + 1)) { 00725 // Next character is already finalized. Estimate the imaginary 00726 // body including this character based on the character. Skip 00727 // whitespace if necessary. 00728 bool skipped_whitespaces = false; 00729 float c1 = center_x(i + 1) - 1.5 * estimated_pitch_; 00730 while (c1 > box(i).right()) { 00731 skipped_whitespaces = true; 00732 c1 -= estimated_pitch_; 00733 } 00734 TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top()); 00735 00736 // Collect all characters that mostly fit in the region. 00737 // Also, their union height shouldn't be too big. 00738 int j = i; 00739 TBOX merged; 00740 while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) && 00741 merged.bounding_union(box(j)).height() < 00742 estimated_pitch_ * (1 + kFPTolerance)) { 00743 merged += box(j); 00744 j--; 00745 } 00746 00747 if (j >= 0 && significant_overlap(ibody, box(j))) { 00748 // character(j) lies on the character boundary and doesn't fit 00749 // well into the imaginary body. 00750 if (!is_final(j)) intersecting = true; 00751 } else { 00752 not_intersecting = true; 00753 if (i - j > 0) { 00754 // Merge character(j+1) ... character(i) because they fit 00755 // into the body nicely. 00756 if (i - j == 1) { 00757 // Only one char in the imaginary body. 00758 if (!skipped_whitespaces) mark_good(i); 00759 // set ibody as bounding box of this character to get 00760 // better pitch analysis result for halfwidth glyphs 00761 // followed by a halfwidth space. 00762 if (box(i).width() <= estimated_pitch_ * 0.5) { 00763 ibody += box(i); 00764 character(i)->set_box(ibody); 00765 } 00766 character(i)->set_merge_to_prev(false); 00767 finalize(i); 00768 } else { 00769 for (int k = i; k > j + 1; k--) { 00770 character(k)->set_merge_to_prev(true); 00771 } 00772 } 00773 } 00774 } 00775 } 00776 if (i > 0 && is_final(i - 1)) { 00777 // Now we repeat everything from the opposite side. Previous 00778 // character is already finalized. Estimate the imaginary body 00779 // including this character based on the character. 00780 bool skipped_whitespaces = false; 00781 float c1 = center_x(i - 1) + 1.5 * estimated_pitch_; 00782 while (c1 < box(i).left()) { 00783 skipped_whitespaces = true; 00784 c1 += estimated_pitch_; 00785 } 00786 TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top()); 00787 00788 int j = i; 00789 TBOX merged; 00790 while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) && 00791 merged.bounding_union(box(j)).height() < 00792 estimated_pitch_ * (1 + kFPTolerance)) { 00793 merged += box(j); 00794 j++; 00795 } 00796 00797 if (j < num_chars() && significant_overlap(ibody, box(j))) { 00798 if (!is_final(j)) intersecting = true; 00799 } else { 00800 not_intersecting = true; 00801 if (j - i > 0) { 00802 if (j - i == 1) { 00803 if (!skipped_whitespaces) mark_good(i); 00804 if (box(i).width() <= estimated_pitch_ * 0.5) { 00805 ibody += box(i); 00806 character(i)->set_box(ibody); 00807 } 00808 character(i)->set_merge_to_prev(false); 00809 finalize(i); 00810 } else { 00811 for (int k = i + 1; k < j; k++) { 00812 character(k)->set_merge_to_prev(true); 00813 } 00814 } 00815 } 00816 } 00817 } 00818 00819 // This character doesn't fit well into the estimated imaginary 00820 // bodies. Mark it as bad. 00821 if (intersecting && !not_intersecting) mark_bad(i); 00822 if (character(i)->alignment() != alignment || 00823 character(i)->merge_to_prev()) { 00824 changed = true; 00825 } 00826 } 00827 00828 return changed; 00829 } 00830 00831 void FPRow::MergeFragments() { 00832 int last_char = 0; 00833 00834 for (int j = 0; j < num_chars(); ++j) { 00835 if (character(j)->merge_to_prev()) { 00836 character(last_char)->Merge(*character(j)); 00837 character(j)->set_delete_flag(true); 00838 clear_alignment(last_char); 00839 character(j-1)->set_merge_to_prev(false); 00840 } else { 00841 last_char = j; 00842 } 00843 } 00844 DeleteChars(); 00845 } 00846 00847 void FPRow::FinalizeLargeChars() { 00848 float row_pitch = estimated_pitch(); 00849 for (int i = 0; i < num_chars(); i++) { 00850 if (is_final(i)) continue; 00851 00852 // Finalize if both neighbors are finalized. We have no other choice. 00853 if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) { 00854 finalize(i); 00855 continue; 00856 } 00857 00858 float cx = center_x(i); 00859 TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1); 00860 if (i > 0) { 00861 // The preceding character significantly intersects with the 00862 // imaginary body of this character. Let Pass2Analyze() handle 00863 // this case. 00864 if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) continue; 00865 if (!is_final(i - 1)) { 00866 TBOX merged = box(i); 00867 merged += box(i - 1); 00868 if (merged.width() < row_pitch) continue; 00869 // This character cannot be finalized yet because it can be 00870 // merged with the previous one. Again, let Pass2Analyze() 00871 // handle this case. 00872 } 00873 } 00874 if (i < num_chars() - 1) { 00875 if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) continue; 00876 if (!is_final(i + 1)) { 00877 TBOX merged = box(i); 00878 merged += box(i + 1); 00879 if (merged.width() < row_pitch) continue; 00880 } 00881 } 00882 finalize(i); 00883 } 00884 00885 // Update alignment decision. We only consider finalized characters 00886 // in pass2. E.g. if a finalized character C has another finalized 00887 // character L on its left and a not-finalized character R on its 00888 // right, we mark C as good if the pitch between C and L is good, 00889 // regardless of the pitch between C and R. 00890 for (int i = 0; i < num_chars(); i++) { 00891 if (!is_final(i)) continue; 00892 bool good_pitch = false; 00893 bool bad_pitch = false; 00894 if (i > 0 && is_final(i - 1)) { 00895 if (is_good_pitch(row_pitch, box(i - 1), box(i))) { 00896 good_pitch = true; 00897 } else { 00898 bad_pitch = true; 00899 } 00900 } 00901 if (i < num_chars() - 1 && is_final(i + 1)) { 00902 if (is_good_pitch(row_pitch, box(i), box(i + 1))) { 00903 good_pitch = true; 00904 } else { 00905 bad_pitch = true; 00906 } 00907 } 00908 if (good_pitch && !bad_pitch) mark_good(i); 00909 else if (!good_pitch && bad_pitch) mark_bad(i); 00910 } 00911 } 00912 00913 class FPAnalyzer { 00914 public: 00915 FPAnalyzer(): page_tr_(), rows_() { } 00916 ~FPAnalyzer() { } 00917 00918 void Init(ICOORD page_tr, TO_BLOCK_LIST *port_blocks); 00919 00920 void Pass1Analyze() { 00921 for (int i = 0; i < rows_.size(); i++) rows_[i].Pass1Analyze(); 00922 } 00923 00924 // Estimate character pitch for each row. The argument pass1 can be 00925 // set to true if the function is called after Pass1Analyze(), to 00926 // eliminate some redundant computation. 00927 void EstimatePitch(bool pass1); 00928 00929 bool maybe_fixed_pitch() { 00930 if (rows_.empty() || 00931 rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) return false; 00932 return true; 00933 } 00934 00935 void MergeFragments() { 00936 for (int i = 0; i < rows_.size(); i++) rows_[i].MergeFragments(); 00937 } 00938 00939 void FinalizeLargeChars() { 00940 for (int i = 0; i < rows_.size(); i++) rows_[i].FinalizeLargeChars(); 00941 } 00942 00943 bool Pass2Analyze() { 00944 bool changed = false; 00945 for (int i = 0; i < rows_.size(); i++) { 00946 if (rows_[i].Pass2Analyze()) { 00947 changed = true; 00948 } 00949 } 00950 return changed; 00951 } 00952 00953 void OutputEstimations() { 00954 for (int i = 0; i < rows_.size(); i++) rows_[i].OutputEstimations(); 00955 // Don't we need page-level estimation of gaps/spaces? 00956 } 00957 00958 void DebugOutputResult() { 00959 tprintf("FPAnalyzer: final result\n"); 00960 for (int i = 0; i < rows_.size(); i++) rows_[i].DebugOutputResult(i); 00961 } 00962 00963 int num_rows() { 00964 return rows_.size(); 00965 } 00966 00967 // Returns the upper limit for pass2 loop iteration. 00968 int max_iteration() { 00969 // We're fixing at least one character per iteration. So basically 00970 // we shouldn't require more than max_chars_per_row_ iterations. 00971 return max_chars_per_row_ + 100; 00972 } 00973 00974 private: 00975 ICOORD page_tr_; 00976 GenericVector<FPRow> rows_; 00977 int num_tall_rows_; 00978 int num_bad_rows_; 00979 int num_empty_rows_; 00980 int max_chars_per_row_; 00981 }; 00982 00983 void FPAnalyzer::Init(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) { 00984 page_tr_ = page_tr; 00985 00986 TO_BLOCK_IT block_it; 00987 block_it.set_to_list (port_blocks); 00988 00989 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); 00990 block_it.forward()) { 00991 TO_BLOCK *block = block_it.data(); 00992 if (!block->get_rows()->empty()) { 00993 ASSERT_HOST(block->xheight > 0); 00994 find_repeated_chars(block, FALSE); 00995 } 00996 } 00997 00998 num_empty_rows_ = 0; 00999 max_chars_per_row_ = 0; 01000 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); 01001 block_it.forward()) { 01002 TO_ROW_IT row_it = block_it.data()->get_rows(); 01003 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { 01004 FPRow row; 01005 row.Init(row_it.data()); 01006 rows_.push_back(row); 01007 int num_chars = rows_.back().num_chars(); 01008 if (num_chars <= 1) num_empty_rows_++; 01009 if (num_chars > max_chars_per_row_) max_chars_per_row_ = num_chars; 01010 } 01011 } 01012 } 01013 01014 void FPAnalyzer::EstimatePitch(bool pass1) { 01015 LocalCorrelation pitch_height_stats; 01016 01017 num_tall_rows_ = 0; 01018 num_bad_rows_ = 0; 01019 pitch_height_stats.Clear(); 01020 for (int i = 0; i < rows_.size(); i++) { 01021 rows_[i].EstimatePitch(pass1); 01022 if (rows_[i].good_pitches()) { 01023 pitch_height_stats.Add(rows_[i].height() + rows_[i].gap(), 01024 rows_[i].pitch(), rows_[i].good_pitches()); 01025 if (rows_[i].height_pitch_ratio() > 1.1) num_tall_rows_++; 01026 } else { 01027 num_bad_rows_++; 01028 } 01029 } 01030 01031 pitch_height_stats.Finish(); 01032 for (int i = 0; i < rows_.size(); i++) { 01033 if (rows_[i].good_pitches() >= 5) { 01034 // We have enough evidences. Just use the pitch estimation 01035 // from this row. 01036 rows_[i].set_estimated_pitch(rows_[i].pitch()); 01037 } else if (rows_[i].num_chars() > 1) { 01038 float estimated_pitch = 01039 pitch_height_stats.EstimateYFor(rows_[i].height() + rows_[i].gap(), 01040 0.1); 01041 // CJK characters are more likely to be fragmented than poorly 01042 // chopped. So trust the page-level estimation of character 01043 // pitch only if it's larger than row-level estimation or 01044 // row-level estimation is too large (2x bigger than row height). 01045 if (estimated_pitch > rows_[i].pitch() || 01046 rows_[i].pitch() > rows_[i].height() * 2.0) { 01047 rows_[i].set_estimated_pitch(estimated_pitch); 01048 } else { 01049 rows_[i].set_estimated_pitch(rows_[i].pitch()); 01050 } 01051 } 01052 } 01053 } 01054 01055 } // namespace 01056 01057 void compute_fixed_pitch_cjk(ICOORD page_tr, 01058 TO_BLOCK_LIST *port_blocks) { 01059 FPAnalyzer analyzer; 01060 analyzer.Init(page_tr, port_blocks); 01061 if (analyzer.num_rows() == 0) return; 01062 01063 analyzer.Pass1Analyze(); 01064 analyzer.EstimatePitch(true); 01065 01066 // Perform pass1 analysis again with the initial estimation of row 01067 // pitches, for better estimation. 01068 analyzer.Pass1Analyze(); 01069 analyzer.EstimatePitch(true); 01070 01071 // Early exit if the page doesn't seem to contain fixed pitch rows. 01072 if (!analyzer.maybe_fixed_pitch()) { 01073 if (textord_debug_pitch_test) { 01074 tprintf("Page doesn't seem to contain fixed pitch rows\n"); 01075 } 01076 return; 01077 } 01078 01079 int iteration = 0; 01080 do { 01081 analyzer.MergeFragments(); 01082 analyzer.FinalizeLargeChars(); 01083 analyzer.EstimatePitch(false); 01084 iteration++; 01085 } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration()); 01086 01087 if (textord_debug_pitch_test) { 01088 tprintf("compute_fixed_pitch_cjk finished after %d iteration (limit=%d)\n", 01089 iteration, analyzer.max_iteration()); 01090 } 01091 01092 analyzer.OutputEstimations(); 01093 if (textord_debug_pitch_test) analyzer.DebugOutputResult(); 01094 }