TrinityCore
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
utf8 Namespace Reference

Namespaces

 internal
 
 unchecked
 

Classes

class  exception
 
class  invalid_code_point
 
class  invalid_utf16
 
class  invalid_utf8
 
class  iterator
 
class  not_enough_room
 

Typedefs

typedef unsigned char uint8_t
 
typedef unsigned short uint16_t
 
typedef unsigned int uint32_t
 

Functions

template<typename octet_iterator >
octet_iterator append (uint32_t cp, octet_iterator result)
 The library API - functions intended to be called by the users. More...
 
template<typename octet_iterator , typename output_iterator >
output_iterator replace_invalid (octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
 
template<typename octet_iterator , typename output_iterator >
output_iterator replace_invalid (octet_iterator start, octet_iterator end, output_iterator out)
 
template<typename octet_iterator >
uint32_t next (octet_iterator &it, octet_iterator end)
 
template<typename octet_iterator >
uint32_t peek_next (octet_iterator it, octet_iterator end)
 
template<typename octet_iterator >
uint32_t prior (octet_iterator &it, octet_iterator start)
 
template<typename octet_iterator >
uint32_t previous (octet_iterator &it, octet_iterator pass_start)
 Deprecated in versions that include "prior". More...
 
template<typename octet_iterator , typename distance_type >
void advance (octet_iterator &it, distance_type n, octet_iterator end)
 
template<typename octet_iterator >
std::iterator_traits
< octet_iterator >
::difference_type 
distance (octet_iterator first, octet_iterator last)
 
template<typename u16bit_iterator , typename octet_iterator >
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
 
template<typename u16bit_iterator , typename octet_iterator >
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
 
template<typename octet_iterator , typename u32bit_iterator >
octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
 
template<typename octet_iterator , typename u32bit_iterator >
u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
 
template<typename octet_iterator >
octet_iterator find_invalid (octet_iterator start, octet_iterator end)
 
template<typename octet_iterator >
bool is_valid (octet_iterator start, octet_iterator end)
 
template<typename octet_iterator >
bool starts_with_bom (octet_iterator it, octet_iterator end)
 
template<typename octet_iterator >
bool is_bom (octet_iterator it)
 

Variables

const uint8_t bom [] = {0xef, 0xbb, 0xbf}
 The library API - functions intended to be called by the users. More...
 

Typedef Documentation

typedef unsigned short utf8::uint16_t
typedef unsigned int utf8::uint32_t
typedef unsigned char utf8::uint8_t

Function Documentation

template<typename octet_iterator , typename distance_type >
void utf8::advance ( octet_iterator &  it,
distance_type  n,
octet_iterator  end 
)
191  {
192  for (distance_type i = 0; i < n; ++i)
193  utf8::next(it, end);
194  }
uint32_t next(octet_iterator &it, octet_iterator end)
Definition: checked.h:137

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator >
octet_iterator utf8::append ( uint32_t  cp,
octet_iterator  result 
)

The library API - functions intended to be called by the users.

74  {
76  throw invalid_code_point(cp);
77 
78  if (cp < 0x80) // one octet
79  *(result++) = static_cast<uint8_t>(cp);
80  else if (cp < 0x800) { // two octets
81  *(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
82  *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
83  }
84  else if (cp < 0x10000) { // three octets
85  *(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
86  *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
87  *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
88  }
89  else { // four octets
90  *(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
91  *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f) | 0x80);
92  *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
93  *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
94  }
95  return result;
96  }
bool is_code_point_valid(u32 cp)
Definition: core.h:93

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator >
std::iterator_traits<octet_iterator>::difference_type utf8::distance ( octet_iterator  first,
octet_iterator  last 
)
199  {
200  typename std::iterator_traits<octet_iterator>::difference_type dist;
201  for (dist = 0; first < last; ++dist)
202  utf8::next(first, last);
203  return dist;
204  }
uint32_t next(octet_iterator &it, octet_iterator end)
Definition: checked.h:137

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator >
octet_iterator utf8::find_invalid ( octet_iterator  start,
octet_iterator  end 
)
289  {
290  octet_iterator result = start;
291  while (result != end) {
293  if (err_code != internal::UTF8_OK)
294  return result;
295  }
296  return result;
297  }
utf_error
Definition: core.h:134
utf_error validate_next(octet_iterator &it, octet_iterator end, uint32_t &code_point)
Definition: core.h:223
Definition: core.h:134

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator >
bool utf8::is_bom ( octet_iterator  it)
inline
318  {
319  return (
320  (utf8::internal::mask8(*it++)) == bom[0] &&
321  (utf8::internal::mask8(*it++)) == bom[1] &&
322  (utf8::internal::mask8(*it)) == bom[2]
323  );
324  }
uint8_t mask8(octet_type oc)
Definition: core.h:59
const uint8_t bom[]
The library API - functions intended to be called by the users.
Definition: core.h:285

+ Here is the call graph for this function:

template<typename octet_iterator >
bool utf8::is_valid ( octet_iterator  start,
octet_iterator  end 
)
inline
301  {
302  return (utf8::find_invalid(start, end) == end);
303  }
octet_iterator find_invalid(octet_iterator start, octet_iterator end)
Definition: core.h:288

+ Here is the call graph for this function:

template<typename octet_iterator >
uint32_t utf8::next ( octet_iterator &  it,
octet_iterator  end 
)
138  {
139  uint32_t cp = 0;
140  internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
141  switch (err_code) {
142  case internal::UTF8_OK :
143  break;
145  throw not_enough_room();
149  throw invalid_utf8(*it);
151  throw invalid_code_point(cp);
152  }
153  return cp;
154  }
utf_error
Definition: core.h:134
Definition: core.h:134
Definition: core.h:134
utf_error validate_next(octet_iterator &it, octet_iterator end, uint32_t &code_point)
Definition: core.h:223
unsigned int uint32_t
Definition: stdint.h:80
Definition: core.h:134
Definition: core.h:134

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator >
uint32_t utf8::peek_next ( octet_iterator  it,
octet_iterator  end 
)
158  {
159  return utf8::next(it, end);
160  }
uint32_t next(octet_iterator &it, octet_iterator end)
Definition: checked.h:137

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator >
uint32_t utf8::previous ( octet_iterator &  it,
octet_iterator  pass_start 
)

Deprecated in versions that include "prior".

180  {
181  octet_iterator end = it;
182  while (utf8::internal::is_trail(*(--it)))
183  if (it == pass_start)
184  throw invalid_utf8(*it); // error - no lead byte in the sequence
185  octet_iterator temp = it;
186  return utf8::next(temp, end);
187  }
uint32_t next(octet_iterator &it, octet_iterator end)
Definition: checked.h:137
bool is_trail(octet_type oc)
Definition: core.h:69

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator >
uint32_t utf8::prior ( octet_iterator &  it,
octet_iterator  start 
)
164  {
165  // can't do much if it == start
166  if (it == start)
167  throw not_enough_room();
168 
169  octet_iterator end = it;
170  // Go back until we hit either a lead octet or start
171  while (utf8::internal::is_trail(*(--it)))
172  if (it == start)
173  throw invalid_utf8(*it); // error - no lead byte in the sequence
174  return utf8::peek_next(it, end);
175  }
uint32_t peek_next(octet_iterator it, octet_iterator end)
Definition: checked.h:157
bool is_trail(octet_type oc)
Definition: core.h:69

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator , typename output_iterator >
output_iterator utf8::replace_invalid ( octet_iterator  start,
octet_iterator  end,
output_iterator  out,
uint32_t  replacement 
)
100  {
101  while (start != end) {
102  octet_iterator sequence_start = start;
103  internal::utf_error err_code = utf8::internal::validate_next(start, end);
104  switch (err_code) {
105  case internal::UTF8_OK :
106  for (octet_iterator it = sequence_start; it != start; ++it)
107  *out++ = *it;
108  break;
110  throw not_enough_room();
112  out = utf8::append (replacement, out);
113  ++start;
114  break;
118  out = utf8::append (replacement, out);
119  ++start;
120  // just one replacement mark for the sequence
121  while (start != end && utf8::internal::is_trail(*start))
122  ++start;
123  break;
124  }
125  }
126  return out;
127  }
utf_error
Definition: core.h:134
Definition: core.h:134
Definition: core.h:134
utf_error validate_next(octet_iterator &it, octet_iterator end, uint32_t &code_point)
Definition: core.h:223
Definition: core.h:134
Definition: core.h:134
bool is_trail(octet_type oc)
Definition: core.h:69
octet_iterator append(uint32_t cp, octet_iterator result)
The library API - functions intended to be called by the users.
Definition: checked.h:73

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator , typename output_iterator >
output_iterator utf8::replace_invalid ( octet_iterator  start,
octet_iterator  end,
output_iterator  out 
)
inline
131  {
132  static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
133  return utf8::replace_invalid(start, end, out, replacement_marker);
134  }
unsigned int uint32_t
Definition: stdint.h:80
uint16_t mask16(u16_type oc)
Definition: core.h:64
output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
Definition: checked.h:99

+ Here is the call graph for this function:

template<typename octet_iterator >
bool utf8::starts_with_bom ( octet_iterator  it,
octet_iterator  end 
)
inline
307  {
308  return (
309  ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
310  ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
311  ((it != end) && (utf8::internal::mask8(*it)) == bom[2])
312  );
313  }
uint8_t mask8(octet_type oc)
Definition: core.h:59
const uint8_t bom[]
The library API - functions intended to be called by the users.
Definition: core.h:285

+ Here is the call graph for this function:

template<typename u16bit_iterator , typename octet_iterator >
octet_iterator utf8::utf16to8 ( u16bit_iterator  start,
u16bit_iterator  end,
octet_iterator  result 
)
208  {
209  while (start != end) {
210  uint32_t cp = utf8::internal::mask16(*start++);
211  // Take care of surrogate pairs first
213  if (start != end) {
214  uint32_t trail_surrogate = utf8::internal::mask16(*start++);
215  if (utf8::internal::is_trail_surrogate(trail_surrogate))
216  cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
217  else
218  throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
219  }
220  else
221  throw invalid_utf16(static_cast<uint16_t>(cp));
222 
223  }
224  // Lone trail surrogate
226  throw invalid_utf16(static_cast<uint16_t>(cp));
227 
228  result = utf8::append(cp, result);
229  }
230  return result;
231  }
bool is_lead_surrogate(u16 cp)
Definition: core.h:75
bool is_trail_surrogate(u16 cp)
Definition: core.h:81
unsigned int uint32_t
Definition: stdint.h:80
const uint32_t SURROGATE_OFFSET
Definition: core.h:53
uint16_t mask16(u16_type oc)
Definition: core.h:64
octet_iterator append(uint32_t cp, octet_iterator result)
The library API - functions intended to be called by the users.
Definition: checked.h:73

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator , typename u32bit_iterator >
octet_iterator utf8::utf32to8 ( u32bit_iterator  start,
u32bit_iterator  end,
octet_iterator  result 
)
250  {
251  while (start != end)
252  result = utf8::append(*(start++), result);
253 
254  return result;
255  }
octet_iterator append(uint32_t cp, octet_iterator result)
The library API - functions intended to be called by the users.
Definition: checked.h:73

+ Here is the call graph for this function:

template<typename u16bit_iterator , typename octet_iterator >
u16bit_iterator utf8::utf8to16 ( octet_iterator  start,
octet_iterator  end,
u16bit_iterator  result 
)
235  {
236  while (start != end) {
237  uint32_t cp = utf8::next(start, end);
238  if (cp > 0xffff) { //make a surrogate pair
239  *result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
240  *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
241  }
242  else
243  *result++ = static_cast<uint16_t>(cp);
244  }
245  return result;
246  }
uint32_t next(octet_iterator &it, octet_iterator end)
Definition: checked.h:137
const uint16_t TRAIL_SURROGATE_MIN
Definition: core.h:50
unsigned int uint32_t
Definition: stdint.h:80
const uint16_t LEAD_OFFSET
Definition: core.h:52
unsigned short uint16_t
Definition: stdint.h:79

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename octet_iterator , typename u32bit_iterator >
u32bit_iterator utf8::utf8to32 ( octet_iterator  start,
octet_iterator  end,
u32bit_iterator  result 
)
259  {
260  while (start != end)
261  (*result++) = utf8::next(start, end);
262 
263  return result;
264  }
uint32_t next(octet_iterator &it, octet_iterator end)
Definition: checked.h:137

+ Here is the call graph for this function:

Variable Documentation

const uint8_t utf8::bom[] = {0xef, 0xbb, 0xbf}

The library API - functions intended to be called by the users.