Tesseract
3.02
|
00001 /********************************************************************** 00002 * File: strngs.c (Formerly strings.c) 00003 * Description: STRING class functions. 00004 * Author: Ray Smith 00005 * Created: Fri Feb 15 09:13:30 GMT 1991 00006 * 00007 * (C) Copyright 1991, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "mfcpch.h" // Precompiled headers 00021 #include "helpers.h" 00022 #include "tprintf.h" 00023 #include "strngs.h" 00024 #include "genericvector.h" 00025 00026 #include <assert.h> 00027 // Size of buffer needed to host the decimal representation of the maximum 00028 // possible length of an int (in 64 bits, being -<20 digits>. 00029 const int kMaxIntSize = 22; 00030 00031 /********************************************************************** 00032 * STRING_HEADER provides metadata about the allocated buffer, 00033 * including total capacity and how much used (strlen with '\0'). 00034 * 00035 * The implementation hides this header at the start of the data 00036 * buffer and appends the string on the end to keep sizeof(STRING) 00037 * unchanged from earlier versions so serialization is not affected. 00038 * 00039 * The collection of MACROS provide different implementations depending 00040 * on whether the string keeps track of its strlen or not so that this 00041 * feature can be added in later when consumers dont modifify the string 00042 **********************************************************************/ 00043 00044 // Smallest string to allocate by default 00045 const int kMinCapacity = 16; 00046 00047 char* STRING::AllocData(int used, int capacity) { 00048 data_ = (STRING_HEADER *)alloc_string(capacity + sizeof(STRING_HEADER)); 00049 00050 // header is the metadata for this memory block 00051 STRING_HEADER* header = GetHeader(); 00052 header->capacity_ = capacity; 00053 header->used_ = used; 00054 return GetCStr(); 00055 } 00056 00057 void STRING::DiscardData() { 00058 free_string((char *)data_); 00059 } 00060 00061 // This is a private method; ensure FixHeader is called (or used_ is well defined) 00062 // beforehand 00063 char* STRING::ensure_cstr(inT32 min_capacity) { 00064 STRING_HEADER* orig_header = GetHeader(); 00065 if (min_capacity <= orig_header->capacity_) 00066 return ((char *)this->data_) + sizeof(STRING_HEADER); 00067 00068 // if we are going to grow bigger, than double our existing 00069 // size, but if that still is not big enough then keep the 00070 // requested capacity 00071 if (min_capacity < 2 * orig_header->capacity_) 00072 min_capacity = 2 * orig_header->capacity_; 00073 00074 int alloc = sizeof(STRING_HEADER) + min_capacity; 00075 STRING_HEADER* new_header = (STRING_HEADER*)(alloc_string(alloc)); 00076 00077 memcpy(&new_header[1], GetCStr(), orig_header->used_); 00078 new_header->capacity_ = min_capacity; 00079 new_header->used_ = orig_header->used_; 00080 00081 // free old memory, then rebind to new memory 00082 DiscardData(); 00083 data_ = new_header; 00084 00085 assert(InvariantOk()); 00086 return ((char *)data_) + sizeof(STRING_HEADER); 00087 } 00088 00089 // This is const, but is modifying a mutable field 00090 // this way it can be used on const or non-const instances. 00091 void STRING::FixHeader() const { 00092 const STRING_HEADER* header = GetHeader(); 00093 if (header->used_ < 0) 00094 header->used_ = strlen(GetCStr()) + 1; 00095 } 00096 00097 00098 STRING::STRING() { 00099 // 0 indicates old NULL -- it doesnt even have '\0' 00100 AllocData(0, kMinCapacity); 00101 } 00102 00103 STRING::STRING(const STRING& str) { 00104 str.FixHeader(); 00105 const STRING_HEADER* str_header = str.GetHeader(); 00106 int str_used = str_header->used_; 00107 char *this_cstr = AllocData(str_used, str_used); 00108 memcpy(this_cstr, str.GetCStr(), str_used); 00109 assert(InvariantOk()); 00110 } 00111 00112 STRING::STRING(const char* cstr) { 00113 if (cstr == NULL) { 00114 AllocData(0, 0); 00115 } else { 00116 int len = strlen(cstr) + 1; 00117 char* this_cstr = AllocData(len, len); 00118 memcpy(this_cstr, cstr, len); 00119 } 00120 assert(InvariantOk()); 00121 } 00122 00123 STRING::~STRING() { 00124 DiscardData(); 00125 } 00126 00127 // Writes to the given file. Returns false in case of error. 00128 bool STRING::Serialize(FILE* fp) const { 00129 inT32 len = length(); 00130 if (fwrite(&len, sizeof(len), 1, fp) != 1) return false; 00131 if (fwrite(GetCStr(), 1, len, fp) != len) return false; 00132 return true; 00133 } 00134 // Reads from the given file. Returns false in case of error. 00135 // If swap is true, assumes a big/little-endian swap is needed. 00136 bool STRING::DeSerialize(bool swap, FILE* fp) { 00137 inT32 len; 00138 if (fread(&len, sizeof(len), 1, fp) != 1) return false; 00139 if (swap) 00140 ReverseN(&len, sizeof(len)); 00141 truncate_at(len); 00142 if (fread(GetCStr(), 1, len, fp) != len) return false; 00143 return true; 00144 } 00145 00146 BOOL8 STRING::contains(const char c) const { 00147 return (c != '\0') && (strchr (GetCStr(), c) != NULL); 00148 } 00149 00150 inT32 STRING::length() const { 00151 FixHeader(); 00152 return GetHeader()->used_ - 1; 00153 } 00154 00155 const char* STRING::string() const { 00156 const STRING_HEADER* header = GetHeader(); 00157 if (header->used_ == 0) 00158 return NULL; 00159 00160 // mark header length unreliable because tesseract might 00161 // cast away the const and mutate the string directly. 00162 header->used_ = -1; 00163 return GetCStr(); 00164 } 00165 00166 /****** 00167 * The STRING_IS_PROTECTED interface adds additional support to migrate 00168 * code that needs to modify the STRING in ways not otherwise supported 00169 * without violating encapsulation. 00170 * 00171 * Also makes the [] operator return a const so it is immutable 00172 */ 00173 #if STRING_IS_PROTECTED 00174 const char& STRING::operator[](inT32 index) const { 00175 return GetCStr()[index]; 00176 } 00177 00178 void STRING::insert_range(inT32 index, const char* str, int len) { 00179 // if index is outside current range, then also grow size of string 00180 // to accmodate the requested range. 00181 STRING_HEADER* this_header = GetHeader(); 00182 int used = this_header->used_; 00183 if (index > used) 00184 used = index; 00185 00186 char* this_cstr = ensure_cstr(used + len + 1); 00187 if (index < used) { 00188 // move existing string from index to '\0' inclusive. 00189 memmove(this_cstr + index + len, 00190 this_cstr + index, 00191 this_header->used_ - index); 00192 } else if (len > 0) { 00193 // We are going to overwrite previous null terminator, so write the new one. 00194 this_cstr[this_header->used_ + len - 1] = '\0'; 00195 00196 // If the old header did not have the terminator, 00197 // then we need to account for it now that we've added it. 00198 // Otherwise it was already accounted for; we just moved it. 00199 if (this_header->used_ == 0) 00200 ++this_header->used_; 00201 } 00202 00203 // Write new string to index. 00204 // The string is already terminated from the conditions above. 00205 memcpy(this_cstr + index, str, len); 00206 this_header->used_ += len; 00207 00208 assert(InvariantOk()); 00209 } 00210 00211 void STRING::erase_range(inT32 index, int len) { 00212 char* this_cstr = GetCStr(); 00213 STRING_HEADER* this_header = GetHeader(); 00214 00215 memcpy(this_cstr+index, this_cstr+index+len, 00216 this_header->used_ - index - len); 00217 this_header->used_ -= len; 00218 assert(InvariantOk()); 00219 } 00220 00221 #else 00222 void STRING::truncate_at(inT32 index) { 00223 char* this_cstr = ensure_cstr(index + 1); 00224 this_cstr[index] = '\0'; 00225 GetHeader()->used_ = index + 1; 00226 assert(InvariantOk()); 00227 } 00228 00229 char& STRING::operator[](inT32 index) const { 00230 // Code is casting away this const and mutating the string, 00231 // so mark used_ as -1 to flag it unreliable. 00232 GetHeader()->used_ = -1; 00233 return ((char *)GetCStr())[index]; 00234 } 00235 #endif 00236 00237 void STRING::split(const char c, GenericVector<STRING> *splited) { 00238 int start_index = 0; 00239 for (int i = 0; i < length(); i++) { 00240 if ((*this)[i] == c) { 00241 if (i != start_index) { 00242 (*this)[i] = '\0'; 00243 STRING tmp = GetCStr() + start_index; 00244 splited->push_back(tmp); 00245 (*this)[i] = c; 00246 } 00247 start_index = i + 1; 00248 } 00249 } 00250 00251 if (length() != start_index) { 00252 STRING tmp = GetCStr() + start_index; 00253 splited->push_back(tmp); 00254 } 00255 } 00256 00257 BOOL8 STRING::operator==(const STRING& str) const { 00258 FixHeader(); 00259 str.FixHeader(); 00260 const STRING_HEADER* str_header = str.GetHeader(); 00261 const STRING_HEADER* this_header = GetHeader(); 00262 int this_used = this_header->used_; 00263 int str_used = str_header->used_; 00264 00265 return (this_used == str_used) 00266 && (memcmp(GetCStr(), str.GetCStr(), this_used) == 0); 00267 } 00268 00269 BOOL8 STRING::operator!=(const STRING& str) const { 00270 FixHeader(); 00271 str.FixHeader(); 00272 const STRING_HEADER* str_header = str.GetHeader(); 00273 const STRING_HEADER* this_header = GetHeader(); 00274 int this_used = this_header->used_; 00275 int str_used = str_header->used_; 00276 00277 return (this_used != str_used) 00278 || (memcmp(GetCStr(), str.GetCStr(), this_used) != 0); 00279 } 00280 00281 BOOL8 STRING::operator!=(const char* cstr) const { 00282 FixHeader(); 00283 const STRING_HEADER* this_header = GetHeader(); 00284 00285 if (cstr == NULL) 00286 return this_header->used_ > 1; // either '\0' or NULL 00287 else { 00288 inT32 length = strlen(cstr) + 1; 00289 return (this_header->used_ != length) 00290 || (memcmp(GetCStr(), cstr, length) != 0); 00291 } 00292 } 00293 00294 STRING& STRING::operator=(const STRING& str) { 00295 str.FixHeader(); 00296 const STRING_HEADER* str_header = str.GetHeader(); 00297 int str_used = str_header->used_; 00298 00299 GetHeader()->used_ = 0; // clear since ensure doesnt need to copy data 00300 char* this_cstr = ensure_cstr(str_used); 00301 STRING_HEADER* this_header = GetHeader(); 00302 00303 memcpy(this_cstr, str.GetCStr(), str_used); 00304 this_header->used_ = str_used; 00305 00306 assert(InvariantOk()); 00307 return *this; 00308 } 00309 00310 STRING & STRING::operator+=(const STRING& str) { 00311 FixHeader(); 00312 str.FixHeader(); 00313 const STRING_HEADER* str_header = str.GetHeader(); 00314 const char* str_cstr = str.GetCStr(); 00315 int str_used = str_header->used_; 00316 int this_used = GetHeader()->used_; 00317 char* this_cstr = ensure_cstr(this_used + str_used); 00318 00319 STRING_HEADER* this_header = GetHeader(); // after ensure for realloc 00320 00321 if (this_used > 1) { 00322 memcpy(this_cstr + this_used - 1, str_cstr, str_used); 00323 this_header->used_ += str_used - 1; // overwrite '\0' 00324 } else { 00325 memcpy(this_cstr, str_cstr, str_used); 00326 this_header->used_ = str_used; 00327 } 00328 00329 assert(InvariantOk()); 00330 return *this; 00331 } 00332 00333 void STRING::add_str_int(const char* str, int number) { 00334 if (str != NULL) 00335 *this += str; 00336 // Allow space for the maximum possible length of inT64. 00337 char num_buffer[kMaxIntSize]; 00338 snprintf(num_buffer, kMaxIntSize - 1, "%d", number); 00339 num_buffer[kMaxIntSize - 1] = '\0'; 00340 *this += num_buffer; 00341 } 00342 00343 STRING & STRING::operator=(const char* cstr) { 00344 STRING_HEADER* this_header = GetHeader(); 00345 if (cstr) { 00346 int len = strlen(cstr) + 1; 00347 00348 this_header->used_ = 0; // dont bother copying data if need to realloc 00349 char* this_cstr = ensure_cstr(len); 00350 this_header = GetHeader(); // for realloc 00351 memcpy(this_cstr, cstr, len); 00352 this_header->used_ = len; 00353 } 00354 else { 00355 // Reallocate to zero capacity buffer, consistent with the corresponding 00356 // copy constructor. 00357 DiscardData(); 00358 AllocData(0, 0); 00359 } 00360 00361 assert(InvariantOk()); 00362 return *this; 00363 } 00364 00365 00366 STRING STRING::operator+(const STRING& str) const { 00367 STRING result(*this); 00368 result += str; 00369 00370 assert(InvariantOk()); 00371 return result; 00372 } 00373 00374 00375 STRING STRING::operator+(const char ch) const { 00376 STRING result; 00377 FixHeader(); 00378 const STRING_HEADER* this_header = GetHeader(); 00379 int this_used = this_header->used_; 00380 char* result_cstr = result.ensure_cstr(this_used + 1); 00381 STRING_HEADER* result_header = result.GetHeader(); 00382 int result_used = result_header->used_; 00383 00384 // copies '\0' but we'll overwrite that 00385 memcpy(result_cstr, GetCStr(), this_used); 00386 result_cstr[result_used] = ch; // overwrite old '\0' 00387 result_cstr[result_used + 1] = '\0'; // append on '\0' 00388 ++result_header->used_; 00389 00390 assert(InvariantOk()); 00391 return result; 00392 } 00393 00394 00395 STRING& STRING::operator+=(const char *str) { 00396 if (!str || !*str) // empty string has no effect 00397 return *this; 00398 00399 FixHeader(); 00400 int len = strlen(str) + 1; 00401 int this_used = GetHeader()->used_; 00402 char* this_cstr = ensure_cstr(this_used + len); 00403 STRING_HEADER* this_header = GetHeader(); // after ensure for realloc 00404 00405 // if we had non-empty string then append overwriting old '\0' 00406 // otherwise replace 00407 if (this_used > 0) { 00408 memcpy(this_cstr + this_used - 1, str, len); 00409 this_header->used_ += len - 1; 00410 } else { 00411 memcpy(this_cstr, str, len); 00412 this_header->used_ = len; 00413 } 00414 00415 assert(InvariantOk()); 00416 return *this; 00417 } 00418 00419 00420 STRING& STRING::operator+=(const char ch) { 00421 if (ch == '\0') 00422 return *this; 00423 00424 FixHeader(); 00425 int this_used = GetHeader()->used_; 00426 char* this_cstr = ensure_cstr(this_used + 1); 00427 STRING_HEADER* this_header = GetHeader(); 00428 00429 if (this_used > 0) 00430 --this_used; // undo old empty null if there was one 00431 00432 this_cstr[this_used++] = ch; // append ch to end 00433 this_cstr[this_used++] = '\0'; // append '\0' after ch 00434 this_header->used_ = this_used; 00435 00436 assert(InvariantOk()); 00437 return *this; 00438 }