Tesseract  3.02
tesseract-ocr/wordrec/chop.cpp
Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  *
00004  * File:        chop.c  (Formerly chop.c)
00005  * Description:
00006  * Author:       Mark Seaman, OCR Technology
00007  * Created:      Fri Oct 16 14:37:00 1987
00008  * Modified:     Tue Jul 30 16:41:11 1991 (Mark Seaman) marks@hpgrlt
00009  * Language:     C
00010  * Package:      N/A
00011  * Status:       Reusable Software Component
00012  *
00013  * (c) Copyright 1987, Hewlett-Packard Company.
00014  ** Licensed under the Apache License, Version 2.0 (the "License");
00015  ** you may not use this file except in compliance with the License.
00016  ** You may obtain a copy of the License at
00017  ** http://www.apache.org/licenses/LICENSE-2.0
00018  ** Unless required by applicable law or agreed to in writing, software
00019  ** distributed under the License is distributed on an "AS IS" BASIS,
00020  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00021  ** See the License for the specific language governing permissions and
00022  ** limitations under the License.
00023  *
00024  *********************************************************************************/
00025 
00026 /*----------------------------------------------------------------------
00027               I n c l u d e s
00028 ----------------------------------------------------------------------*/
00029 
00030 #include "chop.h"
00031 #include "outlines.h"
00032 #include "olutil.h"
00033 #include "callcpp.h"
00034 #include "plotedges.h"
00035 #include "const.h"
00036 #include "wordrec.h"
00037 
00038 #include <math.h>
00039 
00040 // Include automatically generated configuration file if running autoconf.
00041 #ifdef HAVE_CONFIG_H
00042 #include "config_auto.h"
00043 #endif
00044 
00045 namespace tesseract {
00046 /*----------------------------------------------------------------------
00047               F u n c t i o n s
00048 ----------------------------------------------------------------------*/
00055 PRIORITY Wordrec::point_priority(EDGEPT *point) {
00056   return (PRIORITY)angle_change(point->prev, point, point->next);
00057 }
00058 
00059 
00065 void Wordrec::add_point_to_list(POINT_GROUP point_list, EDGEPT *point) {
00066   HEAPENTRY data;
00067 
00068   if (SizeOfHeap (point_list) < MAX_NUM_POINTS - 2) {
00069     data.Data = (char *) point;
00070     data.Key = point_priority (point);
00071     HeapStore(point_list, &data);
00072   }
00073 
00074 #ifndef GRAPHICS_DISABLED
00075   if (chop_debug > 2)
00076     mark_outline(point);
00077 #endif
00078 }
00079 
00080 
00087 int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) {
00088   VECTOR vector1;
00089   VECTOR vector2;
00090 
00091   int angle;
00092   float length;
00093 
00094   /* Compute angle */
00095   vector1.x = point2->pos.x - point1->pos.x;
00096   vector1.y = point2->pos.y - point1->pos.y;
00097   vector2.x = point3->pos.x - point2->pos.x;
00098   vector2.y = point3->pos.y - point2->pos.y;
00099   /* Use cross product */
00100   length = (float)sqrt((float)LENGTH(vector1) * LENGTH(vector2));
00101   if ((int) length == 0)
00102     return (0);
00103   angle = static_cast<int>(floor(asin(CROSS (vector1, vector2) /
00104                                       length) / PI * 180.0 + 0.5));
00105 
00106   /* Use dot product */
00107   if (SCALAR (vector1, vector2) < 0)
00108     angle = 180 - angle;
00109   /* Adjust angle */
00110   if (angle > 180)
00111     angle -= 360;
00112   if (angle <= -180)
00113     angle += 360;
00114   return (angle);
00115 }
00116 
00123 int Wordrec::is_little_chunk(EDGEPT *point1, EDGEPT *point2) {
00124   EDGEPT *p = point1;            /* Iterator */
00125   int counter = 0;
00126 
00127   do {
00128                                  /* Go from P1 to P2 */
00129     if (is_same_edgept (point2, p)) {
00130       if (is_small_area (point1, point2))
00131         return (TRUE);
00132       else
00133         break;
00134     }
00135     p = p->next;
00136   }
00137   while ((p != point1) && (counter++ < chop_min_outline_points));
00138   /* Go from P2 to P1 */
00139   p = point2;
00140   counter = 0;
00141   do {
00142     if (is_same_edgept (point1, p)) {
00143       return (is_small_area (point2, point1));
00144     }
00145     p = p->next;
00146   }
00147   while ((p != point2) && (counter++ < chop_min_outline_points));
00148 
00149   return (FALSE);
00150 }
00151 
00152 
00158 int Wordrec::is_small_area(EDGEPT *point1, EDGEPT *point2) {
00159   EDGEPT *p = point1->next;      /* Iterator */
00160   int area = 0;
00161   TPOINT origin;
00162 
00163   do {
00164                                  /* Go from P1 to P2 */
00165     origin.x = p->pos.x - point1->pos.x;
00166     origin.y = p->pos.y - point1->pos.y;
00167     area += CROSS (origin, p->vec);
00168     p = p->next;
00169   }
00170   while (!is_same_edgept (point2, p));
00171 
00172   return (area < chop_min_outline_area);
00173 }
00174 
00175 
00182 EDGEPT *Wordrec::pick_close_point(EDGEPT *critical_point,
00183                                   EDGEPT *vertical_point,
00184                                   int *best_dist) {
00185   EDGEPT *best_point = NULL;
00186   int this_distance;
00187   int found_better;
00188 
00189   do {
00190     found_better = FALSE;
00191 
00192     this_distance = edgept_dist (critical_point, vertical_point);
00193     if (this_distance <= *best_dist) {
00194 
00195       if (!(same_point (critical_point->pos, vertical_point->pos) ||
00196         same_point (critical_point->pos, vertical_point->next->pos) ||
00197         (best_point && same_point (best_point->pos, vertical_point->pos)) ||
00198       is_exterior_point (critical_point, vertical_point))) {
00199         *best_dist = this_distance;
00200         best_point = vertical_point;
00201         if (chop_vertical_creep)
00202           found_better = TRUE;
00203       }
00204     }
00205     vertical_point = vertical_point->next;
00206   }
00207   while (found_better == TRUE);
00208 
00209   return (best_point);
00210 }
00211 
00212 
00220 void Wordrec::prioritize_points(TESSLINE *outline, POINT_GROUP points) {
00221   EDGEPT *this_point;
00222   EDGEPT *local_min = NULL;
00223   EDGEPT *local_max = NULL;
00224 
00225   this_point = outline->loop;
00226   local_min = this_point;
00227   local_max = this_point;
00228   do {
00229     if (this_point->vec.y < 0) {
00230                                  /* Look for minima */
00231       if (local_max != NULL)
00232         new_max_point(local_max, points);
00233       else if (is_inside_angle (this_point))
00234         add_point_to_list(points, this_point);
00235       local_max = NULL;
00236       local_min = this_point->next;
00237     }
00238     else if (this_point->vec.y > 0) {
00239                                  /* Look for maxima */
00240       if (local_min != NULL)
00241         new_min_point(local_min, points);
00242       else if (is_inside_angle (this_point))
00243         add_point_to_list(points, this_point);
00244       local_min = NULL;
00245       local_max = this_point->next;
00246     }
00247     else {
00248       /* Flat area */
00249       if (local_max != NULL) {
00250         if (local_max->prev->vec.y != 0) {
00251           new_max_point(local_max, points);
00252         }
00253         local_max = this_point->next;
00254         local_min = NULL;
00255       }
00256       else {
00257         if (local_min->prev->vec.y != 0) {
00258           new_min_point(local_min, points);
00259         }
00260         local_min = this_point->next;
00261         local_max = NULL;
00262       }
00263     }
00264 
00265                                  /* Next point */
00266     this_point = this_point->next;
00267   }
00268   while (this_point != outline->loop);
00269 }
00270 
00271 
00279 void Wordrec::new_min_point(EDGEPT *local_min, POINT_GROUP points) {
00280   inT16 dir;
00281 
00282   dir = direction (local_min);
00283 
00284   if (dir < 0) {
00285     add_point_to_list(points, local_min);
00286     return;
00287   }
00288 
00289   if (dir == 0 && point_priority (local_min) < 0) {
00290     add_point_to_list(points, local_min);
00291     return;
00292   }
00293 }
00294 
00295 
00303 void Wordrec::new_max_point(EDGEPT *local_max, POINT_GROUP points) {
00304   inT16 dir;
00305 
00306   dir = direction (local_max);
00307 
00308   if (dir > 0) {
00309     add_point_to_list(points, local_max);
00310     return;
00311   }
00312 
00313   if (dir == 0 && point_priority (local_max) < 0) {
00314     add_point_to_list(points, local_max);
00315     return;
00316   }
00317 }
00318 
00319 
00332 void Wordrec::vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point,
00333                                         EDGEPT** best_point,
00334                                         EDGEPT_CLIST *new_points) {
00335   EDGEPT *p;                     /* Iterator */
00336   EDGEPT *this_edgept;           /* Iterator */
00337   EDGEPT_C_IT new_point_it(new_points);
00338   int x = split_point->pos.x;    /* X value of vertical */
00339   int best_dist = LARGE_DISTANCE;/* Best point found */
00340 
00341   if (*best_point != NULL)
00342     best_dist = edgept_dist(split_point, *best_point);
00343 
00344   p = target_point;
00345   /* Look at each edge point */
00346   do {
00347     if ((((p->pos.x <= x) && (x <= p->next->pos.x)) ||
00348       ((p->next->pos.x <= x) && (x <= p->pos.x))) &&
00349       !same_point (split_point->pos, p->pos) &&
00350       !same_point (split_point->pos, p->next->pos)
00351     && (*best_point == NULL || !same_point ((*best_point)->pos, p->pos))) {
00352 
00353       if (near_point(split_point, p, p->next, &this_edgept)) {
00354         new_point_it.add_before_then_move(this_edgept);
00355       }
00356 
00357       if (*best_point == NULL)
00358         best_dist = edgept_dist (split_point, this_edgept);
00359 
00360       this_edgept =
00361         pick_close_point(split_point, this_edgept, &best_dist);
00362       if (this_edgept)
00363         *best_point = this_edgept;
00364     }
00365 
00366     p = p->next;
00367   }
00368   while (p != target_point);
00369 }
00370 
00371 }  // namespace tesseract