Tesseract 3.01
/data/source/tesseract-ocr/ccutil/strngs.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        strngs.h  (Formerly strings.h)
00003  * Description: STRING class definition.
00004  * Author:                                      Ray Smith
00005  * Created:                                     Fri Feb 15 09:15:01 GMT 1991
00006  *
00007  * (C) Copyright 1991, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #ifndef           STRNGS_H
00021 #define           STRNGS_H
00022 
00023 #include          <string.h>
00024 #include          "memry.h"
00025 #include          "serialis.h"
00026 
00027 // STRING_IS_PROTECTED means that  string[index] = X is invalid
00028 // because you have to go through strings interface to modify it.
00029 // This allows the string to ensure internal integrity and maintain
00030 // its own string length. Unfortunately this is not possible because
00031 // STRINGS are used as direct-manipulation data buffers for things
00032 // like length arrays and many places cast away the const on string()
00033 // to mutate the string. Turning this off means that internally we
00034 // cannot assume we know the strlen.
00035 #define STRING_IS_PROTECTED  0
00036 
00037 #ifdef CCUTIL_EXPORTS
00038 #define CCUTIL_API __declspec(dllexport)
00039 #elif defined(CCUTIL_IMPORTS)
00040 #define CCUTIL_API __declspec(dllimport)
00041 #else
00042 #define CCUTIL_API
00043 #endif
00044 
00045 class CCUTIL_API STRING
00046 {
00047   public:
00048     STRING();
00049     STRING(const STRING &string);
00050     STRING(const char *string);
00051     ~STRING ();
00052 
00053     BOOL8 contains(const char c) const;
00054     inT32 length() const;
00055     const char *string() const;
00056 
00057 #if STRING_IS_PROTECTED
00058     const char &operator[] (inT32 index) const;
00059     // len is number of chars in s to insert starting at index in this string
00060     void insert_range(inT32 index, const char*s, int len);
00061     void erase_range(inT32 index, int len);
00062     void truncate_at(inT32 index);
00063 #else
00064     char &operator[] (inT32 index) const;
00065 #endif
00066 
00067     BOOL8 operator== (const STRING & string) const;
00068     BOOL8 operator!= (const STRING & string) const;
00069     BOOL8 operator!= (const char *string) const;
00070 
00071     STRING & operator= (const char *string);
00072     STRING & operator= (const STRING & string);
00073 
00074     STRING operator+ (const STRING & string) const;
00075     STRING operator+ (const char ch) const;
00076 
00077     STRING & operator+= (const char *string);
00078     STRING & operator+= (const STRING & string);
00079     STRING & operator+= (const char ch);
00080 
00081     // Appends the given string and int (as a %d) to this.
00082     // += cannot be used for ints as there as a char += operator that would
00083     // be ambiguous, and ints usually need a string before or between them
00084     // anyway.
00085     void add_str_int(const char* str, int number);
00086 
00087     // ensure capcaity but keep pointer encapsulated
00088     inline void ensure(inT32 min_capacity) { ensure_cstr(min_capacity); }
00089 
00090   private:
00091     typedef struct STRING_HEADER {
00092       // How much space was allocated in the string buffer for char data.
00093       int capacity_;
00094 
00095       // used_ is how much of the capacity is currently being used,
00096       // including a '\0' terminator.
00097       //
00098       // If used_ is 0 then string is NULL (not even the '\0')
00099       // else if used_ > 0 then it is strlen() + 1 (because it includes '\0')
00100       // else strlen is >= 0 (not NULL) but needs to be computed.
00101       //      this condition is set when encapsulation is violated because
00102       //      an API returned a mutable string.
00103       //
00104       // capacity_ - used_ = excess capacity that the string can grow
00105       //                     without reallocating
00106       mutable int used_;
00107     } STRING_HEADER;
00108 
00109     // To preserve the behavior of the old serialization, we only have space
00110     // for one pointer in this structure. So we are embedding a data structure
00111     // at the start of the storage that will hold additional state variables,
00112     // then storing the actual string contents immediately after.
00113     STRING_HEADER* data_;
00114 
00115     // returns the header part of the storage
00116     inline STRING_HEADER* GetHeader() {
00117       return data_;
00118     }
00119     inline const STRING_HEADER* GetHeader() const {
00120       return data_;
00121     }
00122 
00123     // returns the string data part of storage
00124     inline char* GetCStr() {
00125       return ((char *)data_) + sizeof(STRING_HEADER);
00126     };
00127 
00128     inline const char* GetCStr() const {
00129       return ((const char *)data_) + sizeof(STRING_HEADER);
00130     };
00131     inline bool InvariantOk() const {
00132 #if STRING_IS_PROTECTED
00133       return (GetHeader()->used_ == 0) ?
00134         (string() == NULL) : (GetHeader()->used_ == (strlen(string()) + 1));
00135 #else
00136       return true;
00137 #endif
00138     }
00139 
00140     // Ensure string has requested capacity as optimization
00141     // to avoid unnecessary reallocations.
00142     // The return value is a cstr buffer with at least requested capacity
00143     char* ensure_cstr(inT32 min_capacity);
00144 
00145     void FixHeader() const;  // make used_ non-negative, even if const
00146 
00147     char* AllocData(int used, int capacity);
00148     void DiscardData();
00149 };
00150 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines