|
Tesseract
3.02
|
#include <statistc.h>
Public Member Functions | |
| STATS (inT32 min_bucket_value, inT32 max_bucket_value_plus_1) | |
| STATS () | |
| ~STATS () | |
| bool | set_range (inT32 min_bucket_value, inT32 max_bucket_value_plus_1) |
| void | clear () |
| void | add (inT32 value, inT32 count) |
| inT32 | mode () const |
| double | mean () const |
| double | sd () const |
| double | ile (double frac) const |
| inT32 | min_bucket () const |
| inT32 | max_bucket () const |
| double | median () const |
| inT32 | pile_count (inT32 value) const |
| inT32 | get_total () const |
| bool | local_min (inT32 x) const |
| void | smooth (inT32 factor) |
| inT32 | cluster (float lower, float upper, float multiple, inT32 max_clusters, STATS *clusters) |
| void | print () const |
| void | print_summary () const |
| void | plot (ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const |
| void | plotline (ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const |
Definition at line 29 of file statistc.h.
Definition at line 39 of file statistc.cpp.
| STATS::STATS | ( | ) |
Definition at line 50 of file statistc.cpp.
{
rangemax_ = 0;
rangemin_ = 0;
buckets_ = NULL;
}
| STATS::~STATS | ( | ) |
Definition at line 91 of file statistc.cpp.
Definition at line 103 of file statistc.cpp.
{
if (buckets_ == NULL) {
return;
}
value = ClipToRange(value, rangemin_, rangemax_ - 1);
buckets_[value - rangemin_] += count;
total_count_ += count; // keep count of total
}
| void STATS::clear | ( | ) |
Definition at line 80 of file statistc.cpp.
{ // clear out buckets
total_count_ = 0;
if (buckets_ != NULL)
memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0]));
}
| inT32 STATS::cluster | ( | float | lower, |
| float | upper, | ||
| float | multiple, | ||
| inT32 | max_clusters, | ||
| STATS * | clusters | ||
| ) |
Definition at line 323 of file statistc.cpp.
{ // array of clusters
BOOL8 new_cluster; // added one
float *centres; // cluster centres
inT32 entry; // bucket index
inT32 cluster; // cluster index
inT32 best_cluster; // one to assign to
inT32 new_centre = 0; // residual mode
inT32 new_mode; // pile count of new_centre
inT32 count; // pile to place
float dist; // from cluster
float min_dist; // from best_cluster
inT32 cluster_count; // no of clusters
if (buckets_ == NULL || max_clusters < 1)
return 0;
centres = new float[max_clusters + 1];
for (cluster_count = 1; cluster_count <= max_clusters
&& clusters[cluster_count].buckets_ != NULL
&& clusters[cluster_count].total_count_ > 0;
cluster_count++) {
centres[cluster_count] =
static_cast<float>(clusters[cluster_count].ile(0.5));
new_centre = clusters[cluster_count].mode();
for (entry = new_centre - 1; centres[cluster_count] - entry < lower
&& entry >= rangemin_
&& pile_count(entry) <= pile_count(entry + 1);
entry--) {
count = pile_count(entry) - clusters[0].pile_count(entry);
if (count > 0) {
clusters[cluster_count].add(entry, count);
clusters[0].add (entry, count);
}
}
for (entry = new_centre + 1; entry - centres[cluster_count] < lower
&& entry < rangemax_
&& pile_count(entry) <= pile_count(entry - 1);
entry++) {
count = pile_count(entry) - clusters[0].pile_count(entry);
if (count > 0) {
clusters[cluster_count].add(entry, count);
clusters[0].add(entry, count);
}
}
}
cluster_count--;
if (cluster_count == 0) {
clusters[0].set_range(rangemin_, rangemax_);
}
do {
new_cluster = FALSE;
new_mode = 0;
for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
count = buckets_[entry] - clusters[0].buckets_[entry];
//remaining pile
if (count > 0) { //any to handle
min_dist = static_cast<float>(MAX_INT32);
best_cluster = 0;
for (cluster = 1; cluster <= cluster_count; cluster++) {
dist = entry + rangemin_ - centres[cluster];
//find distance
if (dist < 0)
dist = -dist;
if (dist < min_dist) {
min_dist = dist; //find least
best_cluster = cluster;
}
}
if (min_dist > upper //far enough for new
&& (best_cluster == 0
|| entry + rangemin_ > centres[best_cluster] * multiple
|| entry + rangemin_ < centres[best_cluster] / multiple)) {
if (count > new_mode) {
new_mode = count;
new_centre = entry + rangemin_;
}
}
}
}
// need new and room
if (new_mode > 0 && cluster_count < max_clusters) {
cluster_count++;
new_cluster = TRUE;
if (!clusters[cluster_count].set_range(rangemin_, rangemax_))
return 0;
centres[cluster_count] = static_cast<float>(new_centre);
clusters[cluster_count].add(new_centre, new_mode);
clusters[0].add(new_centre, new_mode);
for (entry = new_centre - 1; centres[cluster_count] - entry < lower
&& entry >= rangemin_
&& pile_count (entry) <= pile_count(entry + 1); entry--) {
count = pile_count(entry) - clusters[0].pile_count(entry);
if (count > 0) {
clusters[cluster_count].add(entry, count);
clusters[0].add(entry, count);
}
}
for (entry = new_centre + 1; entry - centres[cluster_count] < lower
&& entry < rangemax_
&& pile_count (entry) <= pile_count(entry - 1); entry++) {
count = pile_count(entry) - clusters[0].pile_count(entry);
if (count > 0) {
clusters[cluster_count].add(entry, count);
clusters[0].add (entry, count);
}
}
centres[cluster_count] =
static_cast<float>(clusters[cluster_count].ile(0.5));
}
} while (new_cluster && cluster_count < max_clusters);
delete [] centres;
return cluster_count;
}
| inT32 STATS::get_total | ( | ) | const [inline] |
Definition at line 82 of file statistc.h.
{
return total_count_; // total of all piles
}
| double STATS::ile | ( | double | frac | ) | const |
Definition at line 176 of file statistc.cpp.
{
if (buckets_ == NULL || total_count_ == 0) {
return static_cast<double>(rangemin_);
}
#if 0
// TODO(rays) The existing code doesn't seem to be doing the right thing
// with target a double but this substitute crashes the code that uses it.
// Investigate and fix properly.
int target = IntCastRounded(frac * total_count_);
target = ClipToRange(target, 1, total_count_);
#else
double target = frac * total_count_;
target = ClipToRange(target, 1.0, static_cast<double>(total_count_));
#endif
int sum = 0;
int index = 0;
for (index = 0; index < rangemax_ - rangemin_ && sum < target;
sum += buckets_[index++]);
if (index > 0) {
ASSERT_HOST(buckets_[index - 1] > 0);
return rangemin_ + index -
static_cast<double>(sum - target) / buckets_[index - 1];
} else {
return static_cast<double>(rangemin_);
}
}
| bool STATS::local_min | ( | inT32 | x | ) | const |
Definition at line 265 of file statistc.cpp.
{
if (buckets_ == NULL) {
return false;
}
x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
if (buckets_[x] == 0)
return true;
inT32 index; // table index
for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index);
if (index >= 0 && buckets_[index] < buckets_[x])
return false;
for (index = x + 1; index < rangemax_ - rangemin_ &&
buckets_[index] == buckets_[x]; ++index);
if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x])
return false;
else
return true;
}
| inT32 STATS::max_bucket | ( | ) | const |
Definition at line 224 of file statistc.cpp.
| double STATS::mean | ( | ) | const |
Definition at line 137 of file statistc.cpp.
{ //get mean of samples
if (buckets_ == NULL || total_count_ <= 0) {
return static_cast<double>(rangemin_);
}
inT64 sum = 0;
for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
sum += static_cast<inT64>(index) * buckets_[index];
}
return static_cast<double>(sum) / total_count_ + rangemin_;
}
| double STATS::median | ( | ) | const |
Definition at line 242 of file statistc.cpp.
{ //get median
if (buckets_ == NULL) {
return static_cast<double>(rangemin_);
}
double median = ile(0.5);
int median_pile = static_cast<int>(floor(median));
if ((total_count_ > 1) && (pile_count(median_pile) == 0)) {
inT32 min_pile;
inT32 max_pile;
/* Find preceeding non zero pile */
for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--);
/* Find following non zero pile */
for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++);
median = (min_pile + max_pile) / 2.0;
}
return median;
}
| inT32 STATS::min_bucket | ( | ) | const |
Definition at line 208 of file statistc.cpp.
| inT32 STATS::mode | ( | ) | const |
Definition at line 117 of file statistc.cpp.
{ // get mode of samples
if (buckets_ == NULL) {
return rangemin_;
}
inT32 max = buckets_[0]; // max cell count
inT32 maxindex = 0; // index of max
for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
if (buckets_[index] > max) {
max = buckets_[index]; // find biggest
maxindex = index;
}
}
return maxindex + rangemin_; // index of biggest
}
Definition at line 74 of file statistc.h.
{
if (value <= rangemin_)
return buckets_[0];
if (value >= rangemax_ - 1)
return buckets_[rangemax_ - rangemin_ - 1];
return buckets_[value - rangemin_];
}
| void STATS::plot | ( | ScrollView * | window, |
| float | xorigin, | ||
| float | yorigin, | ||
| float | xscale, | ||
| float | yscale, | ||
| ScrollView::Color | colour | ||
| ) | const |
Definition at line 497 of file statistc.cpp.
| void STATS::plotline | ( | ScrollView * | window, |
| float | xorigin, | ||
| float | yorigin, | ||
| float | xscale, | ||
| float | yscale, | ||
| ScrollView::Color | colour | ||
| ) | const |
Definition at line 524 of file statistc.cpp.
| void STATS::print | ( | ) | const |
Definition at line 446 of file statistc.cpp.
{
if (buckets_ == NULL) {
return;
}
inT32 min = min_bucket() - rangemin_;
inT32 max = max_bucket() - rangemin_;
int num_printed = 0;
for (int index = min; index <= max; index++) {
if (buckets_[index] != 0) {
tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]);
if (++num_printed % 8 == 0)
tprintf ("\n");
}
}
tprintf ("\n");
print_summary();
}
| void STATS::print_summary | ( | ) | const |
Definition at line 472 of file statistc.cpp.
{
if (buckets_ == NULL) {
return;
}
inT32 min = min_bucket();
inT32 max = max_bucket();
tprintf("Total count=%d\n", total_count_);
tprintf("Min=%.2f Really=%d\n", ile(0.0), min);
tprintf("Lower quartile=%.2f\n", ile(0.25));
tprintf("Median=%.2f, ile(0.5)=%.2f\n", median(), ile(0.5));
tprintf("Upper quartile=%.2f\n", ile(0.75));
tprintf("Max=%.2f Really=%d\n", ile(1.0), max);
tprintf("Range=%d\n", max + 1 - min);
tprintf("Mean= %.2f\n", mean());
tprintf("SD= %.2f\n", sd());
}
| double STATS::sd | ( | ) | const |
Definition at line 153 of file statistc.cpp.
{ //standard deviation
if (buckets_ == NULL || total_count_ <= 0) {
return 0.0;
}
inT64 sum = 0;
double sqsum = 0.0;
for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
sum += static_cast<inT64>(index) * buckets_[index];
sqsum += static_cast<double>(index) * index * buckets_[index];
}
double variance = static_cast<double>(sum) / total_count_;
variance = sqsum / total_count_ - variance * variance;
if (variance > 0.0)
return sqrt(variance);
return 0.0;
}
Definition at line 61 of file statistc.cpp.
{
if (max_bucket_value_plus_1 <= min_bucket_value) {
return false;
}
if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
delete [] buckets_;
buckets_ = new inT32[max_bucket_value_plus_1 - min_bucket_value];
}
rangemin_ = min_bucket_value; // setup
rangemax_ = max_bucket_value_plus_1;
clear(); // zero it
return true;
}
| void STATS::smooth | ( | inT32 | factor | ) |
Definition at line 292 of file statistc.cpp.
{
if (buckets_ == NULL || factor < 2) {
return;
}
STATS result(rangemin_, rangemax_);
int entrycount = rangemax_ - rangemin_;
for (int entry = 0; entry < entrycount; entry++) {
//centre weight
int count = buckets_[entry] * factor;
for (int offset = 1; offset < factor; offset++) {
if (entry - offset >= 0)
count += buckets_[entry - offset] * (factor - offset);
if (entry + offset < entrycount)
count += buckets_[entry + offset] * (factor - offset);
}
result.add(entry + rangemin_, count);
}
total_count_ = result.total_count_;
memcpy(buckets_, result.buckets_, entrycount * sizeof(buckets_[0]));
}