OpenTREP Logo  0.6.0
C++ Open Travel Request Parsing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
UTF8Handler.cpp
Go to the documentation of this file.
1 // //////////////////////////////////////////////////////////////////////
2 // Import section
3 // //////////////////////////////////////////////////////////////////////
4 // STL
5 #include <cassert>
6 #include <sstream>
7 #include <string>
8 // OpenTrep
10 
11 namespace OPENTREP {
12 
16  typedef long unsigned int u_int32_t;
17 
18  // //////////////////////////////////////////////////////////////////////
19  static const u_int32_t offsetsFromUTF8[6] = {
20  0x00000000UL, 0x00003080UL, 0x000E2080UL,
21  0x03C82080UL, 0xFA082080UL, 0x82082080UL
22  };
23 
24  // //////////////////////////////////////////////////////////////////////
25  static const char trailingBytesForUTF8[256] = {
26  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
27  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
28  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
29  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
30  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
31  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
32  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
33  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
34  };
35 
36  // //////////////////////////////////////////////////////////////////////
37  std::wstring UTF8Handler::toWideString (const std::string& iSrc) {
38  std::basic_ostringstream<wchar_t> oStr;
39 
40  // Length of the source string
41  const size_t lStringSize = iSrc.size();
42 
43  // Transform the source string in a regular C-string (char*)
44  const char* src = iSrc.c_str();
45 
46  //
47  typedef unsigned char uchar_t;
48 
49  size_t idx = 0;
50  while (idx != lStringSize) {
51 
52  uchar_t lCurrentChar = static_cast<uchar_t> (src[idx]);
53 
62  if (lCurrentChar == '\0') {
63  break;
64  }
65 
66  const int nb = trailingBytesForUTF8[lCurrentChar];
67 
68  wchar_t tmpChar = 0;
69  switch (nb) {
70  // These fall through deliberately
71  case 3: {
72  lCurrentChar = static_cast<uchar_t> (src[idx]); ++idx;
73  tmpChar += lCurrentChar; tmpChar <<= 6;
74  }
75  case 2: {
76  lCurrentChar = static_cast<uchar_t> (src[idx]); ++idx;
77  tmpChar += lCurrentChar; tmpChar <<= 6;
78  }
79  case 1: {
80  lCurrentChar = static_cast<uchar_t> (src[idx]); ++idx;
81  tmpChar += lCurrentChar; tmpChar <<= 6;
82  }
83  case 0: {
84  lCurrentChar = static_cast<uchar_t> (src[idx]); ++idx;
85  tmpChar += lCurrentChar;
86  }
87  }
88 
89  tmpChar -= offsetsFromUTF8[nb];
90  oStr << tmpChar;
91  }
92 
93  oStr << '\0';
94  return oStr.str();
95  }
96 
97  // //////////////////////////////////////////////////////////////////////
98  std::string UTF8Handler::toSimpleString (const std::wstring& iStr) {
99  std::ostringstream oStr;
100 
101  const wchar_t* src = iStr.c_str();
102  size_t idx = 0;
103  size_t i = 0;
104 
105  while (src[i] != 0) {
106  wchar_t ch = src[i];
107 
108  if (ch < 0x80) {
109  const char tmpChar = static_cast<const char> (ch);
110  oStr << tmpChar; ++idx;
111 
112  } else if (ch < 0x800) {
113  char tmpChar = static_cast<const char> ((ch >> 6) | 0xC0);
114  oStr << tmpChar; ++idx;
115 
116  tmpChar = static_cast<const char> ((ch & 0x3F) | 0x80);
117  oStr << tmpChar; ++idx;
118 
119  } else if (ch < 0x10000) {
120  char tmpChar = static_cast<const char> ((ch>>12) | 0xE0);
121  oStr << tmpChar; ++idx;
122 
123  tmpChar = static_cast<const char> (((ch>>6) & 0x3F) | 0x80);
124  oStr << tmpChar; ++idx;
125 
126  tmpChar = static_cast<const char> ((ch & 0x3F) | 0x80);
127  oStr << tmpChar; ++idx;
128 
129  } else if (ch < 0x110000) {
130  char tmpChar = static_cast<const char> ((ch>>18) | 0xF0);
131  oStr << tmpChar; ++idx;
132 
133  tmpChar = static_cast<const char> (((ch>>12) & 0x3F) | 0x80);
134  oStr << tmpChar; ++idx;
135 
136  tmpChar = static_cast<const char> (((ch>>6) & 0x3F) | 0x80);
137  oStr << tmpChar; ++idx;
138 
139  tmpChar = static_cast<const char> ((ch & 0x3F) | 0x80);
140  oStr << tmpChar; ++idx;
141  }
142  i++;
143  }
144 
145  oStr << '\0';
146 
147  return oStr.str();
148  }
149 
150  // //////////////////////////////////////////////////////////////////////
151  std::string UTF8Handler::displayCharString (const char* iString) {
152  std::ostringstream oStr;
153 
154  bool hasReachedEnd = false;
155  for (size_t idx = 0; hasReachedEnd == false; ++idx) {
156  if (idx != 0) {
157  oStr << "; ";
158  }
159  const unsigned char lChar = iString[idx];
160  // const wchar_t lChar = iString[idx];
161  if (lChar == '\0') {
162  hasReachedEnd = true;
163  }
164  oStr << "[" << idx << "]: " << std::hex << lChar;
165  }
166  oStr << std::endl;
167 
168  return oStr.str();
169  }
170 
171  // //////////////////////////////////////////////////////////////////////
172  std::string UTF8Handler::displaySTLWString (const std::wstring& iString) {
173  std::ostringstream oStr;
174 
175  size_t idx = 0;
176  for (std::wstring::const_iterator itChar = iString.begin();
177  itChar != iString.end(); ++itChar, ++idx) {
178  if (idx != 0) {
179  oStr << "; ";
180  }
181  const wchar_t lChar = *itChar;
182  oStr << "[" << idx << "]: " << std::hex << lChar;
183  }
184  oStr << std::endl;
185 
186  return oStr.str();
187  }
188 
189 }
190 
static std::wstring toWideString(const std::string &iSrc)
Definition: UTF8Handler.cpp:37
long unsigned int u_int32_t
Definition: UTF8Handler.cpp:16
static std::string displayCharString(const char *iString)
static std::string toSimpleString(const std::wstring &iStr)
Definition: UTF8Handler.cpp:98
static std::string displaySTLWString(const std::wstring &iString)
static const u_int32_t offsetsFromUTF8[6]
Definition: UTF8Handler.cpp:19
static const char trailingBytesForUTF8[256]
Definition: UTF8Handler.cpp:25