TrinityCore
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
UTF8< CharType > Struct Template Reference

UTF-8 encoding. More...

#include <encodings.h>

Public Types

enum  { supportUnicode = 1 }
 
typedef CharType Ch
 

Static Public Member Functions

template<typename OutputStream >
static void Encode (OutputStream &os, unsigned codepoint)
 
template<typename InputStream >
static bool Decode (InputStream &is, unsigned *codepoint)
 
template<typename InputStream , typename OutputStream >
static bool Validate (InputStream &is, OutputStream &os)
 
static unsigned char GetRange (unsigned char c)
 
template<typename InputByteStream >
static CharType TakeBOM (InputByteStream &is)
 
template<typename InputByteStream >
static Ch Take (InputByteStream &is)
 
template<typename OutputByteStream >
static void PutBOM (OutputByteStream &os)
 
template<typename OutputByteStream >
static void Put (OutputByteStream &os, Ch c)
 

Detailed Description

template<typename CharType = char>
struct UTF8< CharType >

UTF-8 encoding.

http://en.wikipedia.org/wiki/UTF-8 http://tools.ietf.org/html/rfc3629

Template Parameters
CharTypeCode unit for storing 8-bit UTF-8 data. Default is char.
Note
implements Encoding concept

Member Typedef Documentation

template<typename CharType = char>
typedef CharType UTF8< CharType >::Ch

Member Enumeration Documentation

template<typename CharType = char>
anonymous enum
Enumerator
supportUnicode 
99 { supportUnicode = 1 };
Definition: encodings.h:99

Member Function Documentation

template<typename CharType = char>
template<typename InputStream >
static bool UTF8< CharType >::Decode ( InputStream &  is,
unsigned *  codepoint 
)
inlinestatic
124  {
125 #define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu)
126 #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
127 #define TAIL() COPY(); TRANS(0x70)
128  Ch c = is.Take();
129  if (!(c & 0x80)) {
130  *codepoint = (unsigned char)c;
131  return true;
132  }
133 
134  unsigned char type = GetRange((unsigned char)c);
135  *codepoint = (0xFF >> type) & (unsigned char)c;
136  bool result = true;
137  switch (type) {
138  case 2: TAIL(); return result;
139  case 3: TAIL(); TAIL(); return result;
140  case 4: COPY(); TRANS(0x50); TAIL(); return result;
141  case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result;
142  case 6: TAIL(); TAIL(); TAIL(); return result;
143  case 10: COPY(); TRANS(0x20); TAIL(); return result;
144  case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result;
145  default: return false;
146  }
147 #undef COPY
148 #undef TRANS
149 #undef TAIL
150  }
#define COPY()
#define TAIL()
CharType Ch
Definition: encodings.h:97
#define TRANS(mask)
static unsigned char GetRange(unsigned char c)
Definition: encodings.h:178

+ Here is the call graph for this function:

template<typename CharType = char>
template<typename OutputStream >
static void UTF8< CharType >::Encode ( OutputStream &  os,
unsigned  codepoint 
)
inlinestatic
102  {
103  if (codepoint <= 0x7F)
104  os.Put(static_cast<Ch>(codepoint & 0xFF));
105  else if (codepoint <= 0x7FF) {
106  os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
107  os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
108  }
109  else if (codepoint <= 0xFFFF) {
110  os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
111  os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
112  os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
113  }
114  else {
115  RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
116  os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
117  os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
118  os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
119  os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
120  }
121  }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:344
template<typename CharType = char>
static unsigned char UTF8< CharType >::GetRange ( unsigned char  c)
inlinestatic
178  {
179  // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
180  // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
181  static const unsigned char type[] = {
182  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
183  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
184  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
185  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
186  0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
187  0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
188  0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
189  0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
190  8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
191  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
192  };
193  return type[c];
194  }

+ Here is the caller graph for this function:

template<typename CharType = char>
template<typename OutputByteStream >
static void UTF8< CharType >::Put ( OutputByteStream &  os,
Ch  c 
)
inlinestatic
222  {
223  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
224  os.Put(static_cast<typename OutputByteStream::Ch>(c));
225  }
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition: rapidjson.h:375
template<typename CharType = char>
template<typename OutputByteStream >
static void UTF8< CharType >::PutBOM ( OutputByteStream &  os)
inlinestatic
216  {
217  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
218  os.Put(0xEFu); os.Put(0xBBu); os.Put(0xBFu);
219  }
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition: rapidjson.h:375
template<typename CharType = char>
template<typename InputByteStream >
static Ch UTF8< CharType >::Take ( InputByteStream &  is)
inlinestatic
210  {
211  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
212  return is.Take();
213  }
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition: rapidjson.h:375

+ Here is the caller graph for this function:

template<typename CharType = char>
template<typename InputByteStream >
static CharType UTF8< CharType >::TakeBOM ( InputByteStream &  is)
inlinestatic
197  {
198  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
199  Ch c = Take(is);
200  if ((unsigned char)c != 0xEFu) return c;
201  c = is.Take();
202  if ((unsigned char)c != 0xBBu) return c;
203  c = is.Take();
204  if ((unsigned char)c != 0xBFu) return c;
205  c = is.Take();
206  return c;
207  }
CharType Ch
Definition: encodings.h:97
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition: rapidjson.h:375
static Ch Take(InputByteStream &is)
Definition: encodings.h:210

+ Here is the call graph for this function:

template<typename CharType = char>
template<typename InputStream , typename OutputStream >
static bool UTF8< CharType >::Validate ( InputStream &  is,
OutputStream &  os 
)
inlinestatic
153  {
154 #define COPY() os.Put(c = is.Take())
155 #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
156 #define TAIL() COPY(); TRANS(0x70)
157  Ch c;
158  COPY();
159  if (!(c & 0x80))
160  return true;
161 
162  bool result = true;
163  switch (GetRange((unsigned char)c)) {
164  case 2: TAIL(); return result;
165  case 3: TAIL(); TAIL(); return result;
166  case 4: COPY(); TRANS(0x50); TAIL(); return result;
167  case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result;
168  case 6: TAIL(); TAIL(); TAIL(); return result;
169  case 10: COPY(); TRANS(0x20); TAIL(); return result;
170  case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result;
171  default: return false;
172  }
173 #undef COPY
174 #undef TRANS
175 #undef TAIL
176  }
#define COPY()
#define TAIL()
CharType Ch
Definition: encodings.h:97
#define TRANS(mask)
static unsigned char GetRange(unsigned char c)
Definition: encodings.h:178

+ Here is the call graph for this function:


The documentation for this struct was generated from the following file: