Tesseract
3.02
|
Go to the source code of this file.
Classes | |
struct | INT_FX_RESULT_STRUCT |
Namespaces | |
namespace | tesseract |
Functions | |
void | InitIntegerFX () |
FCOORD | FeatureDirection (uinT8 theta) |
tesseract::TrainingSample * | GetIntFeatures (tesseract::NormalizationMode mode, TBLOB *blob, const DENORM &denorm) |
int | ExtractIntFeat (TBLOB *Blob, const DENORM &denorm, INT_FEATURE_ARRAY BLFeat, INT_FEATURE_ARRAY CNFeat, INT_FX_RESULT_STRUCT *Results, inT32 *FeatureOutlineArray=0) |
uinT8 | BinaryAnglePlusPi (inT32 Y, inT32 X) |
int | SaveFeature (INT_FEATURE_ARRAY FeatureArray, uinT16 FeatureNum, inT16 X, inT16 Y, uinT8 Theta) |
uinT16 | MySqrt (inT32 X, inT32 Y) |
uinT8 | MySqrt2 (uinT16 N, uinT32 I, uinT8 *Exp) |
void | ClipRadius (uinT8 *RxInv, uinT8 *RxExp, uinT8 *RyInv, uinT8 *RyExp) |
Definition at line 429 of file intfx.cpp.
{ inT16 Angle, Atan; uinT16 Ratio; uinT32 AbsX, AbsY; assert ((X != 0) || (Y != 0)); if (X < 0) AbsX = -X; else AbsX = X; if (Y < 0) AbsY = -Y; else AbsY = Y; if (AbsX > AbsY) Ratio = AbsY * ATAN_TABLE_SIZE / AbsX; else Ratio = AbsX * ATAN_TABLE_SIZE / AbsY; if (Ratio >= ATAN_TABLE_SIZE) Ratio = ATAN_TABLE_SIZE - 1; Atan = AtanTable[Ratio]; if (X >= 0) if (Y >= 0) if (AbsX > AbsY) Angle = Atan; else Angle = 64 - Atan; else if (AbsX > AbsY) Angle = 256 - Atan; else Angle = 192 + Atan; else if (Y >= 0) if (AbsX > AbsY) Angle = 128 - Atan; else Angle = 64 + Atan; else if (AbsX > AbsY) Angle = 128 + Atan; else Angle = 192 - Atan; /* reverse angles to match old feature extractor: Angle += PI */ Angle += 128; Angle &= 255; return (uinT8) Angle; }
Definition at line 594 of file intfx.cpp.
{ register uinT8 AM, BM, AE, BE; register uinT8 BitN, LastCarry; int RxInvLarge, RyInvSmall; AM = classify_radius_gyr_min_man; AE = classify_radius_gyr_min_exp; BM = *RxInv; BE = *RxExp; LastCarry = 1; while ((AM != 0) || (BM != 0)) { if (AE > BE) { BitN = LastCarry + (AM & 1) + 1; AM >>= 1; AE--; } else if (AE < BE) { BitN = LastCarry + (!(BM & 1)); BM >>= 1; BE--; } else { /* AE == BE */ BitN = LastCarry + (AM & 1) + (!(BM & 1)); AM >>= 1; BM >>= 1; AE--; BE--; } LastCarry = (BitN & 2) > 1; BitN = BitN & 1; } BitN = LastCarry + 1; LastCarry = (BitN & 2) > 1; BitN = BitN & 1; if (BitN == 1) { *RxInv = classify_radius_gyr_min_man; *RxExp = classify_radius_gyr_min_exp; } AM = classify_radius_gyr_min_man; AE = classify_radius_gyr_min_exp; BM = *RyInv; BE = *RyExp; LastCarry = 1; while ((AM != 0) || (BM != 0)) { if (AE > BE) { BitN = LastCarry + (AM & 1) + 1; AM >>= 1; AE--; } else if (AE < BE) { BitN = LastCarry + (!(BM & 1)); BM >>= 1; BE--; } else { /* AE == BE */ BitN = LastCarry + (AM & 1) + (!(BM & 1)); AM >>= 1; BM >>= 1; AE--; BE--; } LastCarry = (BitN & 2) > 1; BitN = BitN & 1; } BitN = LastCarry + 1; LastCarry = (BitN & 2) > 1; BitN = BitN & 1; if (BitN == 1) { *RyInv = classify_radius_gyr_min_man; *RyExp = classify_radius_gyr_min_exp; } AM = classify_radius_gyr_max_man; AE = classify_radius_gyr_max_exp; BM = *RxInv; BE = *RxExp; LastCarry = 1; while ((AM != 0) || (BM != 0)) { if (AE > BE) { BitN = LastCarry + (AM & 1) + 1; AM >>= 1; AE--; } else if (AE < BE) { BitN = LastCarry + (!(BM & 1)); BM >>= 1; BE--; } else { /* AE == BE */ BitN = LastCarry + (AM & 1) + (!(BM & 1)); AM >>= 1; BM >>= 1; AE--; BE--; } LastCarry = (BitN & 2) > 1; BitN = BitN & 1; } BitN = LastCarry + 1; LastCarry = (BitN & 2) > 1; BitN = BitN & 1; if (BitN == 1) RxInvLarge = 1; else RxInvLarge = 0; AM = *RyInv; AE = *RyExp; BM = classify_radius_gyr_max_man; BE = classify_radius_gyr_max_exp; LastCarry = 1; while ((AM != 0) || (BM != 0)) { if (AE > BE) { BitN = LastCarry + (AM & 1) + 1; AM >>= 1; AE--; } else if (AE < BE) { BitN = LastCarry + (!(BM & 1)); BM >>= 1; BE--; } else { /* AE == BE */ BitN = LastCarry + (AM & 1) + (!(BM & 1)); AM >>= 1; BM >>= 1; AE--; BE--; } LastCarry = (BitN & 2) > 1; BitN = BitN & 1; } BitN = LastCarry + 1; LastCarry = (BitN & 2) > 1; BitN = BitN & 1; if (BitN == 1) RyInvSmall = 1; else RyInvSmall = 0; if (RxInvLarge && RyInvSmall) { *RyInv = classify_radius_gyr_max_man; *RyExp = classify_radius_gyr_max_exp; } }
int ExtractIntFeat | ( | TBLOB * | Blob, |
const DENORM & | denorm, | ||
INT_FEATURE_ARRAY | BLFeat, | ||
INT_FEATURE_ARRAY | CNFeat, | ||
INT_FX_RESULT_STRUCT * | Results, | ||
inT32 * | FeatureOutlineArray = 0 |
||
) |
Definition at line 143 of file intfx.cpp.
{ TESSLINE *OutLine; EDGEPT *Loop, *LoopStart, *Segment; inT16 LastX, LastY, Xmean, Ymean; inT32 NormX, NormY, DeltaX, DeltaY; inT32 Xsum, Ysum; uinT32 Ix, Iy, LengthSum; uinT16 n; // n - the number of features to extract from a given outline segment. // We extract features from every outline segment longer than ~6 units. // We chop these long segments into standard-sized features approximately // 13 (= 64 / 5) units in length. uinT8 Theta; uinT16 NumBLFeatures, NumCNFeatures; uinT8 RxInv, RyInv; /* x.xxxxxxx * 2^Exp */ uinT8 RxExp, RyExp; /* sxxxxxxxxxxxxxxxxxxxxxxx.xxxxxxxx */ register inT32 pfX, pfY, dX, dY; uinT16 Length; register int i; Results->Length = 0; Results->Xmean = 0; Results->Ymean = 0; Results->Rx = 0; Results->Ry = 0; Results->NumBL = 0; Results->NumCN = 0; Results->YBottom = MAX_UINT8; Results->YTop = 0; // Calculate the centroid (Xmean, Ymean) for the blob. // We use centroid (instead of center of bounding box or center of smallest // enclosing circle) so the algorithm will not be too greatly influenced by // small amounts of information at the edge of a character's bounding box. NumBLFeatures = 0; NumCNFeatures = 0; OutLine = Blob->outlines; Xsum = 0; Ysum = 0; LengthSum = 0; while (OutLine != NULL) { LoopStart = OutLine->loop; Loop = LoopStart; LastX = Loop->pos.x; LastY = Loop->pos.y; /* Check for bad loops */ if ((Loop == NULL) || (Loop->next == NULL) || (Loop->next == LoopStart)) return FALSE; do { Segment = Loop; Loop = Loop->next; NormX = Loop->pos.x; NormY = Loop->pos.y; n = 1; if (!Segment->IsHidden()) { DeltaX = NormX - LastX; DeltaY = NormY - LastY; Length = MySqrt(DeltaX, DeltaY); n = ((Length << 2) + Length + 32) >> 6; if (n != 0) { Xsum += ((LastX << 1) + DeltaX) * (int) Length; Ysum += ((LastY << 1) + DeltaY) * (int) Length; LengthSum += Length; } } if (n != 0) { /* Throw away a point that is too close */ LastX = NormX; LastY = NormY; } } while (Loop != LoopStart); OutLine = OutLine->next; } if (LengthSum == 0) return FALSE; Xmean = (Xsum / (inT32) LengthSum) >> 1; Ymean = (Ysum / (inT32) LengthSum) >> 1; Results->Length = LengthSum; Results->Xmean = Xmean; Results->Ymean = Ymean; // Extract Baseline normalized features, // and find 2nd moments (Ix, Iy) & radius of gyration (Rx, Ry). // // Ix = Sum y^2 dA, where: // Ix: the second moment of area about the axis x // dA = 1 for our standard-sized piece of outline // y: the perependicular distance to the x axis // Rx = sqrt(Ix / A) // Note: 1 <= Rx <= height of blob / 2 // Ry = sqrt(Iy / A) // Note: 1 <= Ry <= width of blob / 2 Ix = 0; Iy = 0; NumBLFeatures = 0; OutLine = Blob->outlines; int min_x = 0; int max_x = 0; while (OutLine != NULL) { LoopStart = OutLine->loop; Loop = LoopStart; LastX = Loop->pos.x - Xmean; LastY = Loop->pos.y; /* Check for bad loops */ if ((Loop == NULL) || (Loop->next == NULL) || (Loop->next == LoopStart)) return FALSE; do { Segment = Loop; Loop = Loop->next; NormX = Loop->pos.x - Xmean; NormY = Loop->pos.y; if (NormY < Results->YBottom) Results->YBottom = ClipToRange(NormY, 0, MAX_UINT8); if (NormY > Results->YTop) Results->YTop = ClipToRange(NormY, 0, MAX_UINT8); UpdateRange(NormX, &min_x, &max_x); n = 1; if (!Segment->IsHidden()) { DeltaX = NormX - LastX; DeltaY = NormY - LastY; Length = MySqrt(DeltaX, DeltaY); n = ((Length << 2) + Length + 32) >> 6; if (n != 0) { Theta = BinaryAnglePlusPi(DeltaY, DeltaX); dX = (DeltaX << 8) / n; dY = (DeltaY << 8) / n; pfX = (LastX << 8) + (dX >> 1); pfY = (LastY << 8) + (dY >> 1); Ix += ((pfY >> 8) - Ymean) * ((pfY >> 8) - Ymean); // TODO(eger): Hmmm... Xmean is not necessarily 0. // Figure out if we should center against Xmean for these // features, and if so fix Iy & SaveFeature(). Iy += (pfX >> 8) * (pfX >> 8); if (SaveFeature(BLFeat, NumBLFeatures, (inT16) (pfX >> 8), (inT16) ((pfY >> 8) - 128), Theta) == FALSE) return FALSE; NumBLFeatures++; for (i = 1; i < n; i++) { pfX += dX; pfY += dY; Ix += ((pfY >> 8) - Ymean) * ((pfY >> 8) - Ymean); Iy += (pfX >> 8) * (pfX >> 8); if (SaveFeature(BLFeat, NumBLFeatures, (inT16) (pfX >> 8), (inT16) ((pfY >> 8) - 128), Theta) == FALSE) return FALSE; NumBLFeatures++; } } } if (n != 0) { /* Throw away a point that is too close */ LastX = NormX; LastY = NormY; } } while (Loop != LoopStart); OutLine = OutLine->next; } Results->Width = max_x - min_x; if (Ix == 0) Ix = 1; if (Iy == 0) Iy = 1; RxInv = MySqrt2 (NumBLFeatures, Ix, &RxExp); RyInv = MySqrt2 (NumBLFeatures, Iy, &RyExp); ClipRadius(&RxInv, &RxExp, &RyInv, &RyExp); Results->Rx = (inT16) (51.2 / (double) RxInv * pow (2.0, (double) RxExp)); Results->Ry = (inT16) (51.2 / (double) RyInv * pow (2.0, (double) RyExp)); if (Results->Ry == 0) { /* This would result in features having 'nan' values. Since the expression is always > 0, assign a value of 1. */ Results->Ry = 1; } Results->NumBL = NumBLFeatures; // Extract character normalized features // // Rescale the co-ordinates to "equalize" distribution in X and Y, making // all of the following unichars be sized to look similar: , ' 1 i // // We calculate co-ordinates relative to the centroid, and then scale them // as follows (accomplishing a scale of up to 102.4 / dimension): // y *= 51.2 / Rx [ y scaled by 0.0 ... 102.4 / height of glyph ] // x *= 51.2 / Ry [ x scaled by 0.0 ... 102.4 / width of glyph ] // Although tempting to think so, this does not guarantee that our range // is within [-102.4...102.4] x [-102.4...102.4] because (Xmean, Ymean) // is the centroid, not the center of the bounding box. Instead, we can // only bound the result to [-204 ... 204] x [-204 ... 204] // NumCNFeatures = 0; OutLine = Blob->outlines; int OutLineIndex = -1; while (OutLine != NULL) { LoopStart = OutLine->loop; Loop = LoopStart; LastX = (Loop->pos.x - Xmean) * RyInv; LastY = (Loop->pos.y - Ymean) * RxInv; LastX >>= (inT8) RyExp; LastY >>= (inT8) RxExp; OutLineIndex++; /* Check for bad loops */ if ((Loop == NULL) || (Loop->next == NULL) || (Loop->next == LoopStart)) return FALSE; do { Segment = Loop; Loop = Loop->next; NormX = (Loop->pos.x - Xmean) * RyInv; NormY = (Loop->pos.y - Ymean) * RxInv; NormX >>= (inT8) RyExp; NormY >>= (inT8) RxExp; n = 1; if (!Segment->IsHidden()) { DeltaX = NormX - LastX; DeltaY = NormY - LastY; Length = MySqrt(DeltaX, DeltaY); n = ((Length << 2) + Length + 32) >> 6; if (n != 0) { Theta = BinaryAnglePlusPi(DeltaY, DeltaX); dX = (DeltaX << 8) / n; dY = (DeltaY << 8) / n; pfX = (LastX << 8) + (dX >> 1); pfY = (LastY << 8) + (dY >> 1); if (SaveFeature(CNFeat, NumCNFeatures, (inT16) (pfX >> 8), (inT16) (pfY >> 8), Theta) == FALSE) return FALSE; if (FeatureOutlineArray) { FeatureOutlineArray[NumCNFeatures] = OutLineIndex; } NumCNFeatures++; for (i = 1; i < n; i++) { pfX += dX; pfY += dY; if (SaveFeature(CNFeat, NumCNFeatures, (inT16) (pfX >> 8), (inT16) (pfY >> 8), Theta) == FALSE) return FALSE; if (FeatureOutlineArray) { FeatureOutlineArray[NumCNFeatures] = OutLineIndex; } NumCNFeatures++; } } } if (n != 0) { /* Throw away a point that is too close */ LastX = NormX; LastY = NormY; } } while (Loop != LoopStart); OutLine = OutLine->next; } Results->NumCN = NumCNFeatures; return TRUE; }
tesseract::TrainingSample* GetIntFeatures | ( | tesseract::NormalizationMode | mode, |
TBLOB * | blob, | ||
const DENORM & | denorm | ||
) |
Definition at line 97 of file intfx.cpp.
{ INT_FEATURE_ARRAY blfeatures; INT_FEATURE_ARRAY cnfeatures; INT_FX_RESULT_STRUCT fx_info; ExtractIntFeat(blob, denorm, blfeatures, cnfeatures, &fx_info, NULL); TrainingSample* sample = NULL; if (mode == tesseract::NM_CHAR_ANISOTROPIC) { int num_features = fx_info.NumCN; if (num_features > 0) { sample = TrainingSample::CopyFromFeatures(fx_info, cnfeatures, num_features); } } else if (mode == tesseract::NM_BASELINE) { int num_features = fx_info.NumBL; if (num_features > 0) { sample = TrainingSample::CopyFromFeatures(fx_info, blfeatures, num_features); } } else { ASSERT_HOST(!"Unsupported normalization mode!"); } return sample; }
void InitIntegerFX | ( | ) |
---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------
---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------
Definition at line 74 of file intfx.cpp.
{ static bool atan_table_init = false; atan_table_mutex.Lock(); if (!atan_table_init) { for (int i = 0; i < ATAN_TABLE_SIZE; i++) { AtanTable[i] = (uinT8) (atan ((i / (float) ATAN_TABLE_SIZE)) * 128.0 / PI + 0.5); } for (int i = 0; i < INT_CHAR_NORM_RANGE; ++i) { cos_table[i] = cos(i * 2 * PI / INT_CHAR_NORM_RANGE + PI); sin_table[i] = sin(i * 2 * PI / INT_CHAR_NORM_RANGE + PI); } atan_table_init = true; } atan_table_mutex.Unlock(); }
Definition at line 505 of file intfx.cpp.
{ register uinT16 SqRoot; register uinT32 Square; register uinT16 BitLocation; register uinT32 Sum; const uinT32 EvidenceMultMask = ((1 << IntegerMatcher::kIntEvidenceTruncBits) - 1); if (X < 0) X = -X; if (Y < 0) Y = -Y; if (X > EvidenceMultMask) X = EvidenceMultMask; if (Y > EvidenceMultMask) Y = EvidenceMultMask; Sum = X * X + Y * Y; BitLocation = (EvidenceMultMask + 1) << 1; SqRoot = 0; do { Square = (SqRoot | BitLocation) * (SqRoot | BitLocation); if (Square <= Sum) SqRoot |= BitLocation; BitLocation >>= 1; } while (BitLocation); return SqRoot; }
Definition at line 542 of file intfx.cpp.
{ register inT8 k; register uinT32 N2; register uinT8 SqRoot; register uinT16 Square; register uinT8 BitLocation; register uinT16 Ratio; N2 = N * 41943; k = 9; while ((N2 & 0xc0000000) == 0) { N2 <<= 2; k += 1; } while ((I & 0xc0000000) == 0) { I <<= 2; k -= 1; } if (((N2 & 0x80000000) == 0) && ((I & 0x80000000) == 0)) { N2 <<= 1; I <<= 1; } N2 &= 0xffff0000; I >>= 14; Ratio = N2 / I; BitLocation = 128; SqRoot = 0; do { Square = (SqRoot | BitLocation) * (SqRoot | BitLocation); if (Square <= Ratio) SqRoot |= BitLocation; BitLocation >>= 1; } while (BitLocation); if (k < 0) { *Exp = 0; return 255; } else { *Exp = k; return SqRoot; } }
int SaveFeature | ( | INT_FEATURE_ARRAY | FeatureArray, |
uinT16 | FeatureNum, | ||
inT16 | X, | ||
inT16 | Y, | ||
uinT8 | Theta | ||
) |
Definition at line 478 of file intfx.cpp.
{ INT_FEATURE Feature; if (FeatureNum >= MAX_NUM_INT_FEATURES) return FALSE; Feature = &(FeatureArray[FeatureNum]); X = X + 128; Y = Y + 128; Feature->X = ClipToRange<inT16>(X, 0, 255); Feature->Y = ClipToRange<inT16>(Y, 0, 255); Feature->Theta = Theta; Feature->CP_misses = 0; return TRUE; }