TrinityCore
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
utf8::internal Namespace Reference

Enumerations

enum  utf_error {
  UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE,
  OVERLONG_SEQUENCE, INVALID_CODE_POINT
}
 

Functions

template<typename octet_type >
uint8_t mask8 (octet_type oc)
 
template<typename u16_type >
uint16_t mask16 (u16_type oc)
 
template<typename octet_type >
bool is_trail (octet_type oc)
 
template<typename u16 >
bool is_lead_surrogate (u16 cp)
 
template<typename u16 >
bool is_trail_surrogate (u16 cp)
 
template<typename u16 >
bool is_surrogate (u16 cp)
 
template<typename u32 >
bool is_code_point_valid (u32 cp)
 
template<typename octet_iterator >
std::iterator_traits
< octet_iterator >
::difference_type 
sequence_length (octet_iterator lead_it)
 
template<typename octet_difference_type >
bool is_overlong_sequence (uint32_t cp, octet_difference_type length)
 
template<typename octet_iterator >
utf_error increase_safely (octet_iterator &it, octet_iterator end)
 Helper for get_sequence_x. More...
 
template<typename octet_iterator >
utf_error get_sequence_1 (octet_iterator &it, octet_iterator end, uint32_t &code_point)
 get_sequence_x functions decode utf-8 sequences of the length x More...
 
template<typename octet_iterator >
utf_error get_sequence_2 (octet_iterator &it, octet_iterator end, uint32_t &code_point)
 
template<typename octet_iterator >
utf_error get_sequence_3 (octet_iterator &it, octet_iterator end, uint32_t &code_point)
 
template<typename octet_iterator >
utf_error get_sequence_4 (octet_iterator &it, octet_iterator end, uint32_t &code_point)
 
template<typename octet_iterator >
utf_error validate_next (octet_iterator &it, octet_iterator end, uint32_t &code_point)
 
template<typename octet_iterator >
utf_error validate_next (octet_iterator &it, octet_iterator end)
 

Variables

const uint16_t LEAD_SURROGATE_MIN = 0xd800u
 
const uint16_t LEAD_SURROGATE_MAX = 0xdbffu
 
const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u
 
const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu
 
const uint16_t LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10)
 
const uint32_t SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN
 
const uint32_t CODE_POINT_MAX = 0x0010ffffu
 

Enumeration Type Documentation

Enumerator
UTF8_OK 
NOT_ENOUGH_ROOM 
INVALID_LEAD 
INCOMPLETE_SEQUENCE 
OVERLONG_SEQUENCE 
INVALID_CODE_POINT 

Function Documentation

template<typename octet_iterator >
utf_error utf8::internal::get_sequence_1 ( octet_iterator &  it,
octet_iterator  end,
uint32_t code_point 
)

get_sequence_x functions decode utf-8 sequences of the length x

154  {
155  if (it == end)
156  return NOT_ENOUGH_ROOM;
157 
158  code_point = utf8::internal::mask8(*it);
159 
160  return UTF8_OK;
161  }
uint8_t mask8(octet_type oc)
Definition: core.h:59
Definition: core.h:134
Definition: core.h:134

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator >
utf_error utf8::internal::get_sequence_2 ( octet_iterator &  it,
octet_iterator  end,
uint32_t code_point 
)
165  {
166  if (it == end)
167  return NOT_ENOUGH_ROOM;
168 
169  code_point = utf8::internal::mask8(*it);
170 
172 
173  code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
174 
175  return UTF8_OK;
176  }
#define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END)
Definition: core.h:149
uint8_t mask8(octet_type oc)
Definition: core.h:59
Definition: core.h:134
Definition: core.h:134

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator >
utf_error utf8::internal::get_sequence_3 ( octet_iterator &  it,
octet_iterator  end,
uint32_t code_point 
)
180  {
181  if (it == end)
182  return NOT_ENOUGH_ROOM;
183 
184  code_point = utf8::internal::mask8(*it);
185 
187 
188  code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
189 
191 
192  code_point += (*it) & 0x3f;
193 
194  return UTF8_OK;
195  }
#define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END)
Definition: core.h:149
uint8_t mask8(octet_type oc)
Definition: core.h:59
Definition: checked.h:34
Definition: document.h:390
Definition: core.h:134
Definition: core.h:134

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator >
utf_error utf8::internal::get_sequence_4 ( octet_iterator &  it,
octet_iterator  end,
uint32_t code_point 
)
199  {
200  if (it == end)
201  return NOT_ENOUGH_ROOM;
202 
203  code_point = utf8::internal::mask8(*it);
204 
206 
207  code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
208 
210 
211  code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
212 
214 
215  code_point += (*it) & 0x3f;
216 
217  return UTF8_OK;
218  }
#define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END)
Definition: core.h:149
uint8_t mask8(octet_type oc)
Definition: core.h:59
Definition: checked.h:34
Definition: document.h:390
Definition: core.h:134
Definition: core.h:134

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator >
utf_error utf8::internal::increase_safely ( octet_iterator &  it,
octet_iterator  end 
)

Helper for get_sequence_x.

139  {
140  if (++it == end)
141  return NOT_ENOUGH_ROOM;
142 
143  if (!utf8::internal::is_trail(*it))
144  return INCOMPLETE_SEQUENCE;
145 
146  return UTF8_OK;
147  }
Definition: core.h:134
Definition: core.h:134
bool is_trail(octet_type oc)
Definition: core.h:69

+ Here is the call graph for this function:

template<typename u32 >
bool utf8::internal::is_code_point_valid ( u32  cp)
inline
94  {
95  return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
96  }
const uint32_t CODE_POINT_MAX
Definition: core.h:56
bool is_surrogate(u16 cp)
Definition: core.h:87

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename u16 >
bool utf8::internal::is_lead_surrogate ( u16  cp)
inline
76  {
77  return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
78  }
const uint16_t LEAD_SURROGATE_MAX
Definition: core.h:49
const uint16_t LEAD_SURROGATE_MIN
Definition: core.h:48

+ Here is the caller graph for this function:

template<typename octet_difference_type >
bool utf8::internal::is_overlong_sequence ( uint32_t  cp,
octet_difference_type  length 
)
inline
117  {
118  if (cp < 0x80) {
119  if (length != 1)
120  return true;
121  }
122  else if (cp < 0x800) {
123  if (length != 2)
124  return true;
125  }
126  else if (cp < 0x10000) {
127  if (length != 3)
128  return true;
129  }
130 
131  return false;
132  }
float length(float v)
Definition: vectorMath.h:208

+ Here is the caller graph for this function:

template<typename u16 >
bool utf8::internal::is_surrogate ( u16  cp)
inline
88  {
89  return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
90  }
const uint16_t TRAIL_SURROGATE_MAX
Definition: core.h:51
const uint16_t LEAD_SURROGATE_MIN
Definition: core.h:48

+ Here is the caller graph for this function:

template<typename octet_type >
bool utf8::internal::is_trail ( octet_type  oc)
inline
70  {
71  return ((utf8::internal::mask8(oc) >> 6) == 0x2);
72  }
uint8_t mask8(octet_type oc)
Definition: core.h:59

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename u16 >
bool utf8::internal::is_trail_surrogate ( u16  cp)
inline
82  {
83  return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
84  }
const uint16_t TRAIL_SURROGATE_MAX
Definition: core.h:51
const uint16_t TRAIL_SURROGATE_MIN
Definition: core.h:50

+ Here is the caller graph for this function:

template<typename u16_type >
uint16_t utf8::internal::mask16 ( u16_type  oc)
inline
65  {
66  return static_cast<uint16_t>(0xffff & oc);
67  }
unsigned short uint16_t
Definition: stdint.h:79

+ Here is the caller graph for this function:

template<typename octet_type >
uint8_t utf8::internal::mask8 ( octet_type  oc)
inline
60  {
61  return static_cast<uint8_t>(0xff & oc);
62  }
unsigned char uint8_t
Definition: stdint.h:78

+ Here is the caller graph for this function:

template<typename octet_iterator >
std::iterator_traits<octet_iterator>::difference_type utf8::internal::sequence_length ( octet_iterator  lead_it)
inline
101  {
102  uint8_t lead = utf8::internal::mask8(*lead_it);
103  if (lead < 0x80)
104  return 1;
105  else if ((lead >> 5) == 0x6)
106  return 2;
107  else if ((lead >> 4) == 0xe)
108  return 3;
109  else if ((lead >> 3) == 0x1e)
110  return 4;
111  else
112  return 0;
113  }
uint8_t mask8(octet_type oc)
Definition: core.h:59
unsigned char uint8_t
Definition: stdint.h:78

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator >
utf_error utf8::internal::validate_next ( octet_iterator &  it,
octet_iterator  end,
uint32_t code_point 
)
224  {
225  // Save the original value of it so we can go back in case of failure
226  // Of course, it does not make much sense with i.e. stream iterators
227  octet_iterator original_it = it;
228 
229  uint32_t cp = 0;
230  // Determine the sequence length based on the lead octet
231  typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
232  const octet_difference_type length = utf8::internal::sequence_length(it);
233 
234  // Get trail octets and calculate the code point
235  utf_error err = UTF8_OK;
236  switch (length) {
237  case 0:
238  return INVALID_LEAD;
239  case 1:
240  err = utf8::internal::get_sequence_1(it, end, cp);
241  break;
242  case 2:
243  err = utf8::internal::get_sequence_2(it, end, cp);
244  break;
245  case 3:
246  err = utf8::internal::get_sequence_3(it, end, cp);
247  break;
248  case 4:
249  err = utf8::internal::get_sequence_4(it, end, cp);
250  break;
251  }
252 
253  if (err == UTF8_OK) {
254  // Decoding succeeded. Now, security checks...
256  if (!utf8::internal::is_overlong_sequence(cp, length)){
257  // Passed! Return here.
258  code_point = cp;
259  ++it;
260  return UTF8_OK;
261  }
262  else
263  err = OVERLONG_SEQUENCE;
264  }
265  else
266  err = INVALID_CODE_POINT;
267  }
268 
269  // Failure branch - restore the original value of the iterator
270  it = original_it;
271  return err;
272  }
utf_error
Definition: core.h:134
Definition: core.h:134
Definition: core.h:134
std::iterator_traits< octet_iterator >::difference_type sequence_length(octet_iterator lead_it)
Definition: core.h:100
utf_error get_sequence_3(octet_iterator &it, octet_iterator end, uint32_t &code_point)
Definition: core.h:179
unsigned int uint32_t
Definition: stdint.h:80
utf_error get_sequence_1(octet_iterator &it, octet_iterator end, uint32_t &code_point)
get_sequence_x functions decode utf-8 sequences of the length x
Definition: core.h:153
bool is_code_point_valid(u32 cp)
Definition: core.h:93
utf_error get_sequence_4(octet_iterator &it, octet_iterator end, uint32_t &code_point)
Definition: core.h:198
float length(float v)
Definition: vectorMath.h:208
bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
Definition: core.h:116
Definition: core.h:134
utf_error get_sequence_2(octet_iterator &it, octet_iterator end, uint32_t &code_point)
Definition: core.h:164

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator >
utf_error utf8::internal::validate_next ( octet_iterator &  it,
octet_iterator  end 
)
inline
275  {
276  uint32_t ignored;
277  return utf8::internal::validate_next(it, end, ignored);
278  }
utf_error validate_next(octet_iterator &it, octet_iterator end, uint32_t &code_point)
Definition: core.h:223
unsigned int uint32_t
Definition: stdint.h:80

+ Here is the call graph for this function:

Variable Documentation

const uint32_t utf8::internal::CODE_POINT_MAX = 0x0010ffffu
const uint16_t utf8::internal::LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10)
const uint16_t utf8::internal::LEAD_SURROGATE_MAX = 0xdbffu
const uint16_t utf8::internal::LEAD_SURROGATE_MIN = 0xd800u
const uint32_t utf8::internal::SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN
const uint16_t utf8::internal::TRAIL_SURROGATE_MAX = 0xdfffu
const uint16_t utf8::internal::TRAIL_SURROGATE_MIN = 0xdc00u