Tesseract
3.02
|
#include <statistc.h>
Public Member Functions | |
STATS (inT32 min_bucket_value, inT32 max_bucket_value_plus_1) | |
STATS () | |
~STATS () | |
bool | set_range (inT32 min_bucket_value, inT32 max_bucket_value_plus_1) |
void | clear () |
void | add (inT32 value, inT32 count) |
inT32 | mode () const |
double | mean () const |
double | sd () const |
double | ile (double frac) const |
inT32 | min_bucket () const |
inT32 | max_bucket () const |
double | median () const |
inT32 | pile_count (inT32 value) const |
inT32 | get_total () const |
bool | local_min (inT32 x) const |
void | smooth (inT32 factor) |
inT32 | cluster (float lower, float upper, float multiple, inT32 max_clusters, STATS *clusters) |
void | print () const |
void | print_summary () const |
void | plot (ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const |
void | plotline (ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const |
Definition at line 29 of file statistc.h.
Definition at line 39 of file statistc.cpp.
STATS::STATS | ( | ) |
Definition at line 50 of file statistc.cpp.
{ rangemax_ = 0; rangemin_ = 0; buckets_ = NULL; }
STATS::~STATS | ( | ) |
Definition at line 91 of file statistc.cpp.
Definition at line 103 of file statistc.cpp.
{ if (buckets_ == NULL) { return; } value = ClipToRange(value, rangemin_, rangemax_ - 1); buckets_[value - rangemin_] += count; total_count_ += count; // keep count of total }
void STATS::clear | ( | ) |
Definition at line 80 of file statistc.cpp.
{ // clear out buckets total_count_ = 0; if (buckets_ != NULL) memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0])); }
inT32 STATS::cluster | ( | float | lower, |
float | upper, | ||
float | multiple, | ||
inT32 | max_clusters, | ||
STATS * | clusters | ||
) |
Definition at line 323 of file statistc.cpp.
{ // array of clusters BOOL8 new_cluster; // added one float *centres; // cluster centres inT32 entry; // bucket index inT32 cluster; // cluster index inT32 best_cluster; // one to assign to inT32 new_centre = 0; // residual mode inT32 new_mode; // pile count of new_centre inT32 count; // pile to place float dist; // from cluster float min_dist; // from best_cluster inT32 cluster_count; // no of clusters if (buckets_ == NULL || max_clusters < 1) return 0; centres = new float[max_clusters + 1]; for (cluster_count = 1; cluster_count <= max_clusters && clusters[cluster_count].buckets_ != NULL && clusters[cluster_count].total_count_ > 0; cluster_count++) { centres[cluster_count] = static_cast<float>(clusters[cluster_count].ile(0.5)); new_centre = clusters[cluster_count].mode(); for (entry = new_centre - 1; centres[cluster_count] - entry < lower && entry >= rangemin_ && pile_count(entry) <= pile_count(entry + 1); entry--) { count = pile_count(entry) - clusters[0].pile_count(entry); if (count > 0) { clusters[cluster_count].add(entry, count); clusters[0].add (entry, count); } } for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ && pile_count(entry) <= pile_count(entry - 1); entry++) { count = pile_count(entry) - clusters[0].pile_count(entry); if (count > 0) { clusters[cluster_count].add(entry, count); clusters[0].add(entry, count); } } } cluster_count--; if (cluster_count == 0) { clusters[0].set_range(rangemin_, rangemax_); } do { new_cluster = FALSE; new_mode = 0; for (entry = 0; entry < rangemax_ - rangemin_; entry++) { count = buckets_[entry] - clusters[0].buckets_[entry]; //remaining pile if (count > 0) { //any to handle min_dist = static_cast<float>(MAX_INT32); best_cluster = 0; for (cluster = 1; cluster <= cluster_count; cluster++) { dist = entry + rangemin_ - centres[cluster]; //find distance if (dist < 0) dist = -dist; if (dist < min_dist) { min_dist = dist; //find least best_cluster = cluster; } } if (min_dist > upper //far enough for new && (best_cluster == 0 || entry + rangemin_ > centres[best_cluster] * multiple || entry + rangemin_ < centres[best_cluster] / multiple)) { if (count > new_mode) { new_mode = count; new_centre = entry + rangemin_; } } } } // need new and room if (new_mode > 0 && cluster_count < max_clusters) { cluster_count++; new_cluster = TRUE; if (!clusters[cluster_count].set_range(rangemin_, rangemax_)) return 0; centres[cluster_count] = static_cast<float>(new_centre); clusters[cluster_count].add(new_centre, new_mode); clusters[0].add(new_centre, new_mode); for (entry = new_centre - 1; centres[cluster_count] - entry < lower && entry >= rangemin_ && pile_count (entry) <= pile_count(entry + 1); entry--) { count = pile_count(entry) - clusters[0].pile_count(entry); if (count > 0) { clusters[cluster_count].add(entry, count); clusters[0].add(entry, count); } } for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ && pile_count (entry) <= pile_count(entry - 1); entry++) { count = pile_count(entry) - clusters[0].pile_count(entry); if (count > 0) { clusters[cluster_count].add(entry, count); clusters[0].add (entry, count); } } centres[cluster_count] = static_cast<float>(clusters[cluster_count].ile(0.5)); } } while (new_cluster && cluster_count < max_clusters); delete [] centres; return cluster_count; }
inT32 STATS::get_total | ( | ) | const [inline] |
Definition at line 82 of file statistc.h.
{ return total_count_; // total of all piles }
double STATS::ile | ( | double | frac | ) | const |
Definition at line 176 of file statistc.cpp.
{ if (buckets_ == NULL || total_count_ == 0) { return static_cast<double>(rangemin_); } #if 0 // TODO(rays) The existing code doesn't seem to be doing the right thing // with target a double but this substitute crashes the code that uses it. // Investigate and fix properly. int target = IntCastRounded(frac * total_count_); target = ClipToRange(target, 1, total_count_); #else double target = frac * total_count_; target = ClipToRange(target, 1.0, static_cast<double>(total_count_)); #endif int sum = 0; int index = 0; for (index = 0; index < rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]); if (index > 0) { ASSERT_HOST(buckets_[index - 1] > 0); return rangemin_ + index - static_cast<double>(sum - target) / buckets_[index - 1]; } else { return static_cast<double>(rangemin_); } }
bool STATS::local_min | ( | inT32 | x | ) | const |
Definition at line 265 of file statistc.cpp.
{ if (buckets_ == NULL) { return false; } x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_; if (buckets_[x] == 0) return true; inT32 index; // table index for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index); if (index >= 0 && buckets_[index] < buckets_[x]) return false; for (index = x + 1; index < rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index); if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) return false; else return true; }
inT32 STATS::max_bucket | ( | ) | const |
Definition at line 224 of file statistc.cpp.
double STATS::mean | ( | ) | const |
Definition at line 137 of file statistc.cpp.
{ //get mean of samples if (buckets_ == NULL || total_count_ <= 0) { return static_cast<double>(rangemin_); } inT64 sum = 0; for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) { sum += static_cast<inT64>(index) * buckets_[index]; } return static_cast<double>(sum) / total_count_ + rangemin_; }
double STATS::median | ( | ) | const |
Definition at line 242 of file statistc.cpp.
{ //get median if (buckets_ == NULL) { return static_cast<double>(rangemin_); } double median = ile(0.5); int median_pile = static_cast<int>(floor(median)); if ((total_count_ > 1) && (pile_count(median_pile) == 0)) { inT32 min_pile; inT32 max_pile; /* Find preceeding non zero pile */ for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--); /* Find following non zero pile */ for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++); median = (min_pile + max_pile) / 2.0; } return median; }
inT32 STATS::min_bucket | ( | ) | const |
Definition at line 208 of file statistc.cpp.
inT32 STATS::mode | ( | ) | const |
Definition at line 117 of file statistc.cpp.
{ // get mode of samples if (buckets_ == NULL) { return rangemin_; } inT32 max = buckets_[0]; // max cell count inT32 maxindex = 0; // index of max for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) { if (buckets_[index] > max) { max = buckets_[index]; // find biggest maxindex = index; } } return maxindex + rangemin_; // index of biggest }
Definition at line 74 of file statistc.h.
{ if (value <= rangemin_) return buckets_[0]; if (value >= rangemax_ - 1) return buckets_[rangemax_ - rangemin_ - 1]; return buckets_[value - rangemin_]; }
void STATS::plot | ( | ScrollView * | window, |
float | xorigin, | ||
float | yorigin, | ||
float | xscale, | ||
float | yscale, | ||
ScrollView::Color | colour | ||
) | const |
Definition at line 497 of file statistc.cpp.
void STATS::plotline | ( | ScrollView * | window, |
float | xorigin, | ||
float | yorigin, | ||
float | xscale, | ||
float | yscale, | ||
ScrollView::Color | colour | ||
) | const |
Definition at line 524 of file statistc.cpp.
void STATS::print | ( | ) | const |
Definition at line 446 of file statistc.cpp.
{ if (buckets_ == NULL) { return; } inT32 min = min_bucket() - rangemin_; inT32 max = max_bucket() - rangemin_; int num_printed = 0; for (int index = min; index <= max; index++) { if (buckets_[index] != 0) { tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]); if (++num_printed % 8 == 0) tprintf ("\n"); } } tprintf ("\n"); print_summary(); }
void STATS::print_summary | ( | ) | const |
Definition at line 472 of file statistc.cpp.
{ if (buckets_ == NULL) { return; } inT32 min = min_bucket(); inT32 max = max_bucket(); tprintf("Total count=%d\n", total_count_); tprintf("Min=%.2f Really=%d\n", ile(0.0), min); tprintf("Lower quartile=%.2f\n", ile(0.25)); tprintf("Median=%.2f, ile(0.5)=%.2f\n", median(), ile(0.5)); tprintf("Upper quartile=%.2f\n", ile(0.75)); tprintf("Max=%.2f Really=%d\n", ile(1.0), max); tprintf("Range=%d\n", max + 1 - min); tprintf("Mean= %.2f\n", mean()); tprintf("SD= %.2f\n", sd()); }
double STATS::sd | ( | ) | const |
Definition at line 153 of file statistc.cpp.
{ //standard deviation if (buckets_ == NULL || total_count_ <= 0) { return 0.0; } inT64 sum = 0; double sqsum = 0.0; for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) { sum += static_cast<inT64>(index) * buckets_[index]; sqsum += static_cast<double>(index) * index * buckets_[index]; } double variance = static_cast<double>(sum) / total_count_; variance = sqsum / total_count_ - variance * variance; if (variance > 0.0) return sqrt(variance); return 0.0; }
Definition at line 61 of file statistc.cpp.
{ if (max_bucket_value_plus_1 <= min_bucket_value) { return false; } if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) { delete [] buckets_; buckets_ = new inT32[max_bucket_value_plus_1 - min_bucket_value]; } rangemin_ = min_bucket_value; // setup rangemax_ = max_bucket_value_plus_1; clear(); // zero it return true; }
void STATS::smooth | ( | inT32 | factor | ) |
Definition at line 292 of file statistc.cpp.
{ if (buckets_ == NULL || factor < 2) { return; } STATS result(rangemin_, rangemax_); int entrycount = rangemax_ - rangemin_; for (int entry = 0; entry < entrycount; entry++) { //centre weight int count = buckets_[entry] * factor; for (int offset = 1; offset < factor; offset++) { if (entry - offset >= 0) count += buckets_[entry - offset] * (factor - offset); if (entry + offset < entrycount) count += buckets_[entry + offset] * (factor - offset); } result.add(entry + rangemin_, count); } total_count_ = result.total_count_; memcpy(buckets_, result.buckets_, entrycount * sizeof(buckets_[0])); }