|
Tesseract
3.02
|
Go to the source code of this file.
Classes | |
| struct | INT_FX_RESULT_STRUCT |
Namespaces | |
| namespace | tesseract |
Functions | |
| void | InitIntegerFX () |
| FCOORD | FeatureDirection (uinT8 theta) |
| tesseract::TrainingSample * | GetIntFeatures (tesseract::NormalizationMode mode, TBLOB *blob, const DENORM &denorm) |
| int | ExtractIntFeat (TBLOB *Blob, const DENORM &denorm, INT_FEATURE_ARRAY BLFeat, INT_FEATURE_ARRAY CNFeat, INT_FX_RESULT_STRUCT *Results, inT32 *FeatureOutlineArray=0) |
| uinT8 | BinaryAnglePlusPi (inT32 Y, inT32 X) |
| int | SaveFeature (INT_FEATURE_ARRAY FeatureArray, uinT16 FeatureNum, inT16 X, inT16 Y, uinT8 Theta) |
| uinT16 | MySqrt (inT32 X, inT32 Y) |
| uinT8 | MySqrt2 (uinT16 N, uinT32 I, uinT8 *Exp) |
| void | ClipRadius (uinT8 *RxInv, uinT8 *RxExp, uinT8 *RyInv, uinT8 *RyExp) |
Definition at line 429 of file intfx.cpp.
{
inT16 Angle, Atan;
uinT16 Ratio;
uinT32 AbsX, AbsY;
assert ((X != 0) || (Y != 0));
if (X < 0)
AbsX = -X;
else
AbsX = X;
if (Y < 0)
AbsY = -Y;
else
AbsY = Y;
if (AbsX > AbsY)
Ratio = AbsY * ATAN_TABLE_SIZE / AbsX;
else
Ratio = AbsX * ATAN_TABLE_SIZE / AbsY;
if (Ratio >= ATAN_TABLE_SIZE)
Ratio = ATAN_TABLE_SIZE - 1;
Atan = AtanTable[Ratio];
if (X >= 0)
if (Y >= 0)
if (AbsX > AbsY)
Angle = Atan;
else
Angle = 64 - Atan;
else if (AbsX > AbsY)
Angle = 256 - Atan;
else
Angle = 192 + Atan;
else if (Y >= 0)
if (AbsX > AbsY)
Angle = 128 - Atan;
else
Angle = 64 + Atan;
else if (AbsX > AbsY)
Angle = 128 + Atan;
else
Angle = 192 - Atan;
/* reverse angles to match old feature extractor: Angle += PI */
Angle += 128;
Angle &= 255;
return (uinT8) Angle;
}
Definition at line 594 of file intfx.cpp.
{
register uinT8 AM, BM, AE, BE;
register uinT8 BitN, LastCarry;
int RxInvLarge, RyInvSmall;
AM = classify_radius_gyr_min_man;
AE = classify_radius_gyr_min_exp;
BM = *RxInv;
BE = *RxExp;
LastCarry = 1;
while ((AM != 0) || (BM != 0)) {
if (AE > BE) {
BitN = LastCarry + (AM & 1) + 1;
AM >>= 1;
AE--;
}
else if (AE < BE) {
BitN = LastCarry + (!(BM & 1));
BM >>= 1;
BE--;
}
else { /* AE == BE */
BitN = LastCarry + (AM & 1) + (!(BM & 1));
AM >>= 1;
BM >>= 1;
AE--;
BE--;
}
LastCarry = (BitN & 2) > 1;
BitN = BitN & 1;
}
BitN = LastCarry + 1;
LastCarry = (BitN & 2) > 1;
BitN = BitN & 1;
if (BitN == 1) {
*RxInv = classify_radius_gyr_min_man;
*RxExp = classify_radius_gyr_min_exp;
}
AM = classify_radius_gyr_min_man;
AE = classify_radius_gyr_min_exp;
BM = *RyInv;
BE = *RyExp;
LastCarry = 1;
while ((AM != 0) || (BM != 0)) {
if (AE > BE) {
BitN = LastCarry + (AM & 1) + 1;
AM >>= 1;
AE--;
}
else if (AE < BE) {
BitN = LastCarry + (!(BM & 1));
BM >>= 1;
BE--;
}
else { /* AE == BE */
BitN = LastCarry + (AM & 1) + (!(BM & 1));
AM >>= 1;
BM >>= 1;
AE--;
BE--;
}
LastCarry = (BitN & 2) > 1;
BitN = BitN & 1;
}
BitN = LastCarry + 1;
LastCarry = (BitN & 2) > 1;
BitN = BitN & 1;
if (BitN == 1) {
*RyInv = classify_radius_gyr_min_man;
*RyExp = classify_radius_gyr_min_exp;
}
AM = classify_radius_gyr_max_man;
AE = classify_radius_gyr_max_exp;
BM = *RxInv;
BE = *RxExp;
LastCarry = 1;
while ((AM != 0) || (BM != 0)) {
if (AE > BE) {
BitN = LastCarry + (AM & 1) + 1;
AM >>= 1;
AE--;
}
else if (AE < BE) {
BitN = LastCarry + (!(BM & 1));
BM >>= 1;
BE--;
}
else { /* AE == BE */
BitN = LastCarry + (AM & 1) + (!(BM & 1));
AM >>= 1;
BM >>= 1;
AE--;
BE--;
}
LastCarry = (BitN & 2) > 1;
BitN = BitN & 1;
}
BitN = LastCarry + 1;
LastCarry = (BitN & 2) > 1;
BitN = BitN & 1;
if (BitN == 1)
RxInvLarge = 1;
else
RxInvLarge = 0;
AM = *RyInv;
AE = *RyExp;
BM = classify_radius_gyr_max_man;
BE = classify_radius_gyr_max_exp;
LastCarry = 1;
while ((AM != 0) || (BM != 0)) {
if (AE > BE) {
BitN = LastCarry + (AM & 1) + 1;
AM >>= 1;
AE--;
}
else if (AE < BE) {
BitN = LastCarry + (!(BM & 1));
BM >>= 1;
BE--;
}
else { /* AE == BE */
BitN = LastCarry + (AM & 1) + (!(BM & 1));
AM >>= 1;
BM >>= 1;
AE--;
BE--;
}
LastCarry = (BitN & 2) > 1;
BitN = BitN & 1;
}
BitN = LastCarry + 1;
LastCarry = (BitN & 2) > 1;
BitN = BitN & 1;
if (BitN == 1)
RyInvSmall = 1;
else
RyInvSmall = 0;
if (RxInvLarge && RyInvSmall) {
*RyInv = classify_radius_gyr_max_man;
*RyExp = classify_radius_gyr_max_exp;
}
}
| int ExtractIntFeat | ( | TBLOB * | Blob, |
| const DENORM & | denorm, | ||
| INT_FEATURE_ARRAY | BLFeat, | ||
| INT_FEATURE_ARRAY | CNFeat, | ||
| INT_FX_RESULT_STRUCT * | Results, | ||
| inT32 * | FeatureOutlineArray = 0 |
||
| ) |
Definition at line 143 of file intfx.cpp.
{
TESSLINE *OutLine;
EDGEPT *Loop, *LoopStart, *Segment;
inT16 LastX, LastY, Xmean, Ymean;
inT32 NormX, NormY, DeltaX, DeltaY;
inT32 Xsum, Ysum;
uinT32 Ix, Iy, LengthSum;
uinT16 n;
// n - the number of features to extract from a given outline segment.
// We extract features from every outline segment longer than ~6 units.
// We chop these long segments into standard-sized features approximately
// 13 (= 64 / 5) units in length.
uinT8 Theta;
uinT16 NumBLFeatures, NumCNFeatures;
uinT8 RxInv, RyInv; /* x.xxxxxxx * 2^Exp */
uinT8 RxExp, RyExp;
/* sxxxxxxxxxxxxxxxxxxxxxxx.xxxxxxxx */
register inT32 pfX, pfY, dX, dY;
uinT16 Length;
register int i;
Results->Length = 0;
Results->Xmean = 0;
Results->Ymean = 0;
Results->Rx = 0;
Results->Ry = 0;
Results->NumBL = 0;
Results->NumCN = 0;
Results->YBottom = MAX_UINT8;
Results->YTop = 0;
// Calculate the centroid (Xmean, Ymean) for the blob.
// We use centroid (instead of center of bounding box or center of smallest
// enclosing circle) so the algorithm will not be too greatly influenced by
// small amounts of information at the edge of a character's bounding box.
NumBLFeatures = 0;
NumCNFeatures = 0;
OutLine = Blob->outlines;
Xsum = 0;
Ysum = 0;
LengthSum = 0;
while (OutLine != NULL) {
LoopStart = OutLine->loop;
Loop = LoopStart;
LastX = Loop->pos.x;
LastY = Loop->pos.y;
/* Check for bad loops */
if ((Loop == NULL) || (Loop->next == NULL) || (Loop->next == LoopStart))
return FALSE;
do {
Segment = Loop;
Loop = Loop->next;
NormX = Loop->pos.x;
NormY = Loop->pos.y;
n = 1;
if (!Segment->IsHidden()) {
DeltaX = NormX - LastX;
DeltaY = NormY - LastY;
Length = MySqrt(DeltaX, DeltaY);
n = ((Length << 2) + Length + 32) >> 6;
if (n != 0) {
Xsum += ((LastX << 1) + DeltaX) * (int) Length;
Ysum += ((LastY << 1) + DeltaY) * (int) Length;
LengthSum += Length;
}
}
if (n != 0) { /* Throw away a point that is too close */
LastX = NormX;
LastY = NormY;
}
}
while (Loop != LoopStart);
OutLine = OutLine->next;
}
if (LengthSum == 0)
return FALSE;
Xmean = (Xsum / (inT32) LengthSum) >> 1;
Ymean = (Ysum / (inT32) LengthSum) >> 1;
Results->Length = LengthSum;
Results->Xmean = Xmean;
Results->Ymean = Ymean;
// Extract Baseline normalized features,
// and find 2nd moments (Ix, Iy) & radius of gyration (Rx, Ry).
//
// Ix = Sum y^2 dA, where:
// Ix: the second moment of area about the axis x
// dA = 1 for our standard-sized piece of outline
// y: the perependicular distance to the x axis
// Rx = sqrt(Ix / A)
// Note: 1 <= Rx <= height of blob / 2
// Ry = sqrt(Iy / A)
// Note: 1 <= Ry <= width of blob / 2
Ix = 0;
Iy = 0;
NumBLFeatures = 0;
OutLine = Blob->outlines;
int min_x = 0;
int max_x = 0;
while (OutLine != NULL) {
LoopStart = OutLine->loop;
Loop = LoopStart;
LastX = Loop->pos.x - Xmean;
LastY = Loop->pos.y;
/* Check for bad loops */
if ((Loop == NULL) || (Loop->next == NULL) || (Loop->next == LoopStart))
return FALSE;
do {
Segment = Loop;
Loop = Loop->next;
NormX = Loop->pos.x - Xmean;
NormY = Loop->pos.y;
if (NormY < Results->YBottom)
Results->YBottom = ClipToRange(NormY, 0, MAX_UINT8);
if (NormY > Results->YTop)
Results->YTop = ClipToRange(NormY, 0, MAX_UINT8);
UpdateRange(NormX, &min_x, &max_x);
n = 1;
if (!Segment->IsHidden()) {
DeltaX = NormX - LastX;
DeltaY = NormY - LastY;
Length = MySqrt(DeltaX, DeltaY);
n = ((Length << 2) + Length + 32) >> 6;
if (n != 0) {
Theta = BinaryAnglePlusPi(DeltaY, DeltaX);
dX = (DeltaX << 8) / n;
dY = (DeltaY << 8) / n;
pfX = (LastX << 8) + (dX >> 1);
pfY = (LastY << 8) + (dY >> 1);
Ix += ((pfY >> 8) - Ymean) * ((pfY >> 8) - Ymean);
// TODO(eger): Hmmm... Xmean is not necessarily 0.
// Figure out if we should center against Xmean for these
// features, and if so fix Iy & SaveFeature().
Iy += (pfX >> 8) * (pfX >> 8);
if (SaveFeature(BLFeat,
NumBLFeatures,
(inT16) (pfX >> 8),
(inT16) ((pfY >> 8) - 128),
Theta) == FALSE)
return FALSE;
NumBLFeatures++;
for (i = 1; i < n; i++) {
pfX += dX;
pfY += dY;
Ix += ((pfY >> 8) - Ymean) * ((pfY >> 8) - Ymean);
Iy += (pfX >> 8) * (pfX >> 8);
if (SaveFeature(BLFeat,
NumBLFeatures,
(inT16) (pfX >> 8),
(inT16) ((pfY >> 8) - 128),
Theta) == FALSE)
return FALSE;
NumBLFeatures++;
}
}
}
if (n != 0) { /* Throw away a point that is too close */
LastX = NormX;
LastY = NormY;
}
}
while (Loop != LoopStart);
OutLine = OutLine->next;
}
Results->Width = max_x - min_x;
if (Ix == 0)
Ix = 1;
if (Iy == 0)
Iy = 1;
RxInv = MySqrt2 (NumBLFeatures, Ix, &RxExp);
RyInv = MySqrt2 (NumBLFeatures, Iy, &RyExp);
ClipRadius(&RxInv, &RxExp, &RyInv, &RyExp);
Results->Rx = (inT16) (51.2 / (double) RxInv * pow (2.0, (double) RxExp));
Results->Ry = (inT16) (51.2 / (double) RyInv * pow (2.0, (double) RyExp));
if (Results->Ry == 0) {
/*
This would result in features having 'nan' values.
Since the expression is always > 0, assign a value of 1.
*/
Results->Ry = 1;
}
Results->NumBL = NumBLFeatures;
// Extract character normalized features
//
// Rescale the co-ordinates to "equalize" distribution in X and Y, making
// all of the following unichars be sized to look similar: , ' 1 i
//
// We calculate co-ordinates relative to the centroid, and then scale them
// as follows (accomplishing a scale of up to 102.4 / dimension):
// y *= 51.2 / Rx [ y scaled by 0.0 ... 102.4 / height of glyph ]
// x *= 51.2 / Ry [ x scaled by 0.0 ... 102.4 / width of glyph ]
// Although tempting to think so, this does not guarantee that our range
// is within [-102.4...102.4] x [-102.4...102.4] because (Xmean, Ymean)
// is the centroid, not the center of the bounding box. Instead, we can
// only bound the result to [-204 ... 204] x [-204 ... 204]
//
NumCNFeatures = 0;
OutLine = Blob->outlines;
int OutLineIndex = -1;
while (OutLine != NULL) {
LoopStart = OutLine->loop;
Loop = LoopStart;
LastX = (Loop->pos.x - Xmean) * RyInv;
LastY = (Loop->pos.y - Ymean) * RxInv;
LastX >>= (inT8) RyExp;
LastY >>= (inT8) RxExp;
OutLineIndex++;
/* Check for bad loops */
if ((Loop == NULL) || (Loop->next == NULL) || (Loop->next == LoopStart))
return FALSE;
do {
Segment = Loop;
Loop = Loop->next;
NormX = (Loop->pos.x - Xmean) * RyInv;
NormY = (Loop->pos.y - Ymean) * RxInv;
NormX >>= (inT8) RyExp;
NormY >>= (inT8) RxExp;
n = 1;
if (!Segment->IsHidden()) {
DeltaX = NormX - LastX;
DeltaY = NormY - LastY;
Length = MySqrt(DeltaX, DeltaY);
n = ((Length << 2) + Length + 32) >> 6;
if (n != 0) {
Theta = BinaryAnglePlusPi(DeltaY, DeltaX);
dX = (DeltaX << 8) / n;
dY = (DeltaY << 8) / n;
pfX = (LastX << 8) + (dX >> 1);
pfY = (LastY << 8) + (dY >> 1);
if (SaveFeature(CNFeat,
NumCNFeatures,
(inT16) (pfX >> 8),
(inT16) (pfY >> 8),
Theta) == FALSE)
return FALSE;
if (FeatureOutlineArray) {
FeatureOutlineArray[NumCNFeatures] = OutLineIndex;
}
NumCNFeatures++;
for (i = 1; i < n; i++) {
pfX += dX;
pfY += dY;
if (SaveFeature(CNFeat,
NumCNFeatures,
(inT16) (pfX >> 8),
(inT16) (pfY >> 8),
Theta) == FALSE)
return FALSE;
if (FeatureOutlineArray) {
FeatureOutlineArray[NumCNFeatures] = OutLineIndex;
}
NumCNFeatures++;
}
}
}
if (n != 0) { /* Throw away a point that is too close */
LastX = NormX;
LastY = NormY;
}
}
while (Loop != LoopStart);
OutLine = OutLine->next;
}
Results->NumCN = NumCNFeatures;
return TRUE;
}
| tesseract::TrainingSample* GetIntFeatures | ( | tesseract::NormalizationMode | mode, |
| TBLOB * | blob, | ||
| const DENORM & | denorm | ||
| ) |
Definition at line 97 of file intfx.cpp.
{
INT_FEATURE_ARRAY blfeatures;
INT_FEATURE_ARRAY cnfeatures;
INT_FX_RESULT_STRUCT fx_info;
ExtractIntFeat(blob, denorm, blfeatures, cnfeatures, &fx_info, NULL);
TrainingSample* sample = NULL;
if (mode == tesseract::NM_CHAR_ANISOTROPIC) {
int num_features = fx_info.NumCN;
if (num_features > 0) {
sample = TrainingSample::CopyFromFeatures(fx_info, cnfeatures,
num_features);
}
} else if (mode == tesseract::NM_BASELINE) {
int num_features = fx_info.NumBL;
if (num_features > 0) {
sample = TrainingSample::CopyFromFeatures(fx_info, blfeatures,
num_features);
}
} else {
ASSERT_HOST(!"Unsupported normalization mode!");
}
return sample;
}
| void InitIntegerFX | ( | ) |
---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------
---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------
Definition at line 74 of file intfx.cpp.
{
static bool atan_table_init = false;
atan_table_mutex.Lock();
if (!atan_table_init) {
for (int i = 0; i < ATAN_TABLE_SIZE; i++) {
AtanTable[i] =
(uinT8) (atan ((i / (float) ATAN_TABLE_SIZE)) * 128.0 / PI + 0.5);
}
for (int i = 0; i < INT_CHAR_NORM_RANGE; ++i) {
cos_table[i] = cos(i * 2 * PI / INT_CHAR_NORM_RANGE + PI);
sin_table[i] = sin(i * 2 * PI / INT_CHAR_NORM_RANGE + PI);
}
atan_table_init = true;
}
atan_table_mutex.Unlock();
}
Definition at line 505 of file intfx.cpp.
{
register uinT16 SqRoot;
register uinT32 Square;
register uinT16 BitLocation;
register uinT32 Sum;
const uinT32 EvidenceMultMask =
((1 << IntegerMatcher::kIntEvidenceTruncBits) - 1);
if (X < 0)
X = -X;
if (Y < 0)
Y = -Y;
if (X > EvidenceMultMask)
X = EvidenceMultMask;
if (Y > EvidenceMultMask)
Y = EvidenceMultMask;
Sum = X * X + Y * Y;
BitLocation = (EvidenceMultMask + 1) << 1;
SqRoot = 0;
do {
Square = (SqRoot | BitLocation) * (SqRoot | BitLocation);
if (Square <= Sum)
SqRoot |= BitLocation;
BitLocation >>= 1;
}
while (BitLocation);
return SqRoot;
}
Definition at line 542 of file intfx.cpp.
{
register inT8 k;
register uinT32 N2;
register uinT8 SqRoot;
register uinT16 Square;
register uinT8 BitLocation;
register uinT16 Ratio;
N2 = N * 41943;
k = 9;
while ((N2 & 0xc0000000) == 0) {
N2 <<= 2;
k += 1;
}
while ((I & 0xc0000000) == 0) {
I <<= 2;
k -= 1;
}
if (((N2 & 0x80000000) == 0) && ((I & 0x80000000) == 0)) {
N2 <<= 1;
I <<= 1;
}
N2 &= 0xffff0000;
I >>= 14;
Ratio = N2 / I;
BitLocation = 128;
SqRoot = 0;
do {
Square = (SqRoot | BitLocation) * (SqRoot | BitLocation);
if (Square <= Ratio)
SqRoot |= BitLocation;
BitLocation >>= 1;
}
while (BitLocation);
if (k < 0) {
*Exp = 0;
return 255;
}
else {
*Exp = k;
return SqRoot;
}
}
| int SaveFeature | ( | INT_FEATURE_ARRAY | FeatureArray, |
| uinT16 | FeatureNum, | ||
| inT16 | X, | ||
| inT16 | Y, | ||
| uinT8 | Theta | ||
| ) |
Definition at line 478 of file intfx.cpp.
{
INT_FEATURE Feature;
if (FeatureNum >= MAX_NUM_INT_FEATURES)
return FALSE;
Feature = &(FeatureArray[FeatureNum]);
X = X + 128;
Y = Y + 128;
Feature->X = ClipToRange<inT16>(X, 0, 255);
Feature->Y = ClipToRange<inT16>(Y, 0, 255);
Feature->Theta = Theta;
Feature->CP_misses = 0;
return TRUE;
}