Boost.Locale
utf8_codecvt.hpp
1 //
2 // Copyright (c) 2015 Artyom Beilis (Tonkikh)
3 //
4 // Distributed under the Boost Software License, Version 1.0.
5 // https://www.boost.org/LICENSE_1_0.txt
6 
7 #ifndef BOOST_LOCALE_UTF8_CODECVT_HPP
8 #define BOOST_LOCALE_UTF8_CODECVT_HPP
9 
10 #include <boost/locale/generic_codecvt.hpp>
11 #include <boost/locale/utf.hpp>
12 #include <boost/cstdint.hpp>
13 #include <locale>
14 
15 namespace boost { namespace locale {
16 
19  template<typename CharType>
20  class utf8_codecvt : public generic_codecvt<CharType, utf8_codecvt<CharType>> {
21  public:
22  struct state_type {};
23 
24  utf8_codecvt(size_t refs = 0) : generic_codecvt<CharType, utf8_codecvt<CharType>>(refs) {}
25 
26  static int max_encoding_length() { return 4; }
27 
28  static state_type initial_state(generic_codecvt_base::initial_convertion_state /* unused */)
29  {
30  return state_type();
31  }
32  static utf::code_point to_unicode(state_type&, const char*& begin, const char* end)
33  {
34  const char* p = begin;
35 
37  if(c != utf::illegal && c != utf::incomplete)
38  begin = p;
39  return c;
40  }
41 
42  static utf::code_point from_unicode(state_type&, utf::code_point u, char* begin, const char* end)
43  {
45  return utf::illegal;
46  int width;
47  if((width = utf::utf_traits<char>::width(u)) > end - begin)
48  return utf::incomplete;
50  return width;
51  }
52  };
53 
54 }} // namespace boost::locale
55 
56 #endif
static code_point decode(Iterator &p, Iterator e)
bool is_valid_codepoint(code_point v)
the function checks if v is a valid code point
Definition: utf.hpp:27
static Iterator encode(code_point value, Iterator out)
Geneneric utf8 codecvt facet, it allows to convert UTF-8 strings to UTF-16 and UTF-32 using wchar_t,...
Definition: utf8_codecvt.hpp:20
uint32_t code_point
The integral type that can hold a Unicode code point.
Definition: utf.hpp:19
initial_convertion_state
Initial state for converting to or from unicode code points, used by initial_state in derived classes...
Definition: generic_codecvt.hpp:33
static int width(code_point value)
Definition: utf8_codecvt.hpp:22
Generic codecvt facet for various stateless encodings to UTF-16 and UTF-32 using wchar_t,...
Definition: generic_codecvt.hpp:143
constexpr code_point illegal
Special constant that defines illegal code point.
Definition: utf.hpp:22
constexpr code_point incomplete
Special constant that defines incomplete code point.
Definition: utf.hpp:24