28 #ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
29 #define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
52 const uint16_t
LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10);
53 const uint32_t
SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
58 template<
typename octet_type>
59 inline uint8_t
mask8(octet_type oc)
61 return static_cast<uint8_t
>(0xff & oc);
63 template<
typename u16_type>
66 return static_cast<uint16_t
>(0xffff & oc);
68 template<
typename octet_type>
74 template <
typename u16>
77 return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
80 template <
typename u16>
83 return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
86 template <
typename u16>
89 return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
92 template <
typename u32>
98 template <
typename octet_iterator>
99 inline typename std::iterator_traits<octet_iterator>::difference_type
105 else if ((lead >> 5) == 0x6)
107 else if ((lead >> 4) == 0xe)
109 else if ((lead >> 3) == 0x1e)
115 template <
typename octet_difference_type>
122 else if (cp < 0x800) {
126 else if (cp < 0x10000) {
137 template <
typename octet_iterator>
149 #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}
152 template <
typename octet_iterator>
163 template <
typename octet_iterator>
173 code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
178 template <
typename octet_iterator>
192 code_point += (*it) & 0x3f;
197 template <
typename octet_iterator>
215 code_point += (*it) & 0x3f;
220 #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
222 template <
typename octet_iterator>
227 octet_iterator original_it = it;
231 typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
274 template <
typename octet_iterator>
285 const uint8_t
bom[] = {0xef, 0xbb, 0xbf};
287 template <
typename octet_iterator>
290 octet_iterator result = start;
291 while (result != end) {
299 template <
typename octet_iterator>
300 inline bool is_valid(octet_iterator start, octet_iterator end)
305 template <
typename octet_iterator>
316 template <
typename octet_iterator>
327 #endif // header guard
const uint16_t LEAD_SURROGATE_MAX
Definition: core.h:49
const uint16_t TRAIL_SURROGATE_MAX
Definition: core.h:51
utf_error
Definition: core.h:134
utf_error validate_next(octet_iterator &it, octet_iterator end, uint32_t &code_point)
Definition: core.h:223
std::iterator_traits< octet_iterator >::difference_type sequence_length(octet_iterator lead_it)
Definition: core.h:100
#define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END)
Definition: core.h:149
const uint16_t TRAIL_SURROGATE_MIN
Definition: core.h:50
const uint32_t CODE_POINT_MAX
Definition: core.h:56
utf_error get_sequence_3(octet_iterator &it, octet_iterator end, uint32_t &code_point)
Definition: core.h:179
bool is_lead_surrogate(u16 cp)
Definition: core.h:75
bool is_trail_surrogate(u16 cp)
Definition: core.h:81
uint8_t mask8(octet_type oc)
Definition: core.h:59
unsigned char uint8_t
Definition: core.h:38
utf_error get_sequence_1(octet_iterator &it, octet_iterator end, uint32_t &code_point)
get_sequence_x functions decode utf-8 sequences of the length x
Definition: core.h:153
bool is_code_point_valid(u32 cp)
Definition: core.h:93
const uint32_t SURROGATE_OFFSET
Definition: core.h:53
bool is_valid(octet_iterator start, octet_iterator end)
Definition: core.h:300
utf_error increase_safely(octet_iterator &it, octet_iterator end)
Helper for get_sequence_x.
Definition: core.h:138
uint16_t mask16(u16_type oc)
Definition: core.h:64
bool starts_with_bom(octet_iterator it, octet_iterator end)
Definition: core.h:306
utf_error get_sequence_4(octet_iterator &it, octet_iterator end, uint32_t &code_point)
Definition: core.h:198
unsigned short uint16_t
Definition: core.h:39
octet_iterator find_invalid(octet_iterator start, octet_iterator end)
Definition: core.h:288
const uint8_t bom[]
The library API - functions intended to be called by the users.
Definition: core.h:285
float length(float v)
Definition: vectorMath.h:208
bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
Definition: core.h:116
Definition: document.h:390
bool is_surrogate(u16 cp)
Definition: core.h:87
utf_error get_sequence_2(octet_iterator &it, octet_iterator end, uint32_t &code_point)
Definition: core.h:164
unsigned int uint32_t
Definition: core.h:40
bool is_bom(octet_iterator it)
Definition: core.h:317
bool is_trail(octet_type oc)
Definition: core.h:69
const uint16_t LEAD_OFFSET
Definition: core.h:52
const uint16_t LEAD_SURROGATE_MIN
Definition: core.h:48