15 #ifndef RAPIDJSON_ENCODINGS_H_
16 #define RAPIDJSON_ENCODINGS_H_
22 RAPIDJSON_DIAG_OFF(4244)
23 RAPIDJSON_DIAG_OFF(4702)
24 #elif defined(__GNUC__)
26 RAPIDJSON_DIAG_OFF(effc++)
27 RAPIDJSON_DIAG_OFF(overflow)
95 template<
typename CharType =
char>
101 template<
typename OutputStream>
102 static void Encode(OutputStream& os,
unsigned codepoint) {
103 if (codepoint <= 0x7F)
104 os.Put(static_cast<Ch>(codepoint & 0xFF));
105 else if (codepoint <= 0x7FF) {
106 os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
107 os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
109 else if (codepoint <= 0xFFFF) {
110 os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
111 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
112 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
116 os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
117 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
118 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
119 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
123 template <
typename InputStream>
124 static bool Decode(InputStream& is,
unsigned* codepoint) {
125 #define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu)
126 #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
127 #define TAIL() COPY(); TRANS(0x70)
130 *codepoint = (
unsigned char)c;
134 unsigned char type =
GetRange((
unsigned char)c);
135 *codepoint = (0xFF >> type) & (
unsigned char)c;
138 case 2:
TAIL();
return result;
139 case 3:
TAIL();
TAIL();
return result;
145 default:
return false;
152 template <
typename InputStream,
typename OutputStream>
153 static bool Validate(InputStream& is, OutputStream& os) {
154 #define COPY() os.Put(c = is.Take())
155 #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
156 #define TAIL() COPY(); TRANS(0x70)
163 switch (
GetRange((
unsigned char)c)) {
164 case 2:
TAIL();
return result;
165 case 3:
TAIL();
TAIL();
return result;
171 default:
return false;
181 static const unsigned char type[] = {
182 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
183 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
184 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
185 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
186 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
187 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
188 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
189 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
190 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
191 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
196 template <
typename InputByteStream>
197 static CharType
TakeBOM(InputByteStream& is) {
200 if ((
unsigned char)c != 0xEFu)
return c;
202 if ((
unsigned char)c != 0xBBu)
return c;
204 if ((
unsigned char)c != 0xBFu)
return c;
209 template <
typename InputByteStream>
210 static Ch
Take(InputByteStream& is) {
215 template <
typename OutputByteStream>
216 static void PutBOM(OutputByteStream& os) {
218 os.Put(0xEFu); os.Put(0xBBu); os.Put(0xBFu);
221 template <
typename OutputByteStream>
222 static void Put(OutputByteStream& os, Ch c) {
224 os.Put(static_cast<typename OutputByteStream::Ch>(c));
240 template<
typename CharType =
wchar_t>
247 template<
typename OutputStream>
248 static void Encode(OutputStream& os,
unsigned codepoint) {
250 if (codepoint <= 0xFFFF) {
252 os.Put(static_cast<typename OutputStream::Ch>(codepoint));
256 unsigned v = codepoint - 0x10000;
257 os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
258 os.Put((v & 0x3FF) | 0xDC00);
262 template <
typename InputStream>
263 static bool Decode(InputStream& is,
unsigned* codepoint) {
266 if (c < 0xD800 || c > 0xDFFF) {
270 else if (c <= 0xDBFF) {
271 *codepoint = (c & 0x3FF) << 10;
273 *codepoint |= (c & 0x3FF);
274 *codepoint += 0x10000;
275 return c >= 0xDC00 && c <= 0xDFFF;
280 template <
typename InputStream,
typename OutputStream>
281 static bool Validate(InputStream& is, OutputStream& os) {
285 os.Put(c = is.Take());
286 if (c < 0xD800 || c > 0xDFFF)
288 else if (c <= 0xDBFF) {
289 os.Put(c = is.Take());
290 return c >= 0xDC00 && c <= 0xDFFF;
297 template<
typename CharType =
wchar_t>
299 template <
typename InputByteStream>
300 static CharType
TakeBOM(InputByteStream& is) {
302 CharType c =
Take(is);
303 return (
unsigned short)c == 0xFEFFu ?
Take(is) : c;
306 template <
typename InputByteStream>
307 static CharType
Take(InputByteStream& is) {
309 CharType c = (
unsigned char)is.Take();
310 c |= (
unsigned char)is.Take() << 8;
314 template <
typename OutputByteStream>
315 static void PutBOM(OutputByteStream& os) {
317 os.Put(0xFFu); os.Put(0xFEu);
320 template <
typename OutputByteStream>
321 static void Put(OutputByteStream& os, CharType c) {
324 os.Put((c >> 8) & 0xFFu);
329 template<
typename CharType =
wchar_t>
331 template <
typename InputByteStream>
332 static CharType
TakeBOM(InputByteStream& is) {
334 CharType c =
Take(is);
335 return (
unsigned short)c == 0xFEFFu ?
Take(is) : c;
338 template <
typename InputByteStream>
339 static CharType
Take(InputByteStream& is) {
341 CharType c = (
unsigned char)is.Take() << 8;
342 c |= (
unsigned char)is.Take();
346 template <
typename OutputByteStream>
347 static void PutBOM(OutputByteStream& os) {
349 os.Put(0xFEu); os.Put(0xFFu);
352 template <
typename OutputByteStream>
353 static void Put(OutputByteStream& os, CharType c) {
355 os.Put((c >> 8) & 0xFFu);
371 template<
typename CharType =
unsigned>
378 template<
typename OutputStream>
379 static void Encode(OutputStream& os,
unsigned codepoint) {
385 template <
typename InputStream>
386 static bool Decode(InputStream& is,
unsigned* codepoint) {
390 return c <= 0x10FFFF;
393 template <
typename InputStream,
typename OutputStream>
394 static bool Validate(InputStream& is, OutputStream& os) {
397 os.Put(c = is.Take());
398 return c <= 0x10FFFF;
403 template<
typename CharType =
unsigned>
405 template <
typename InputByteStream>
406 static CharType
TakeBOM(InputByteStream& is) {
408 CharType c =
Take(is);
409 return (
unsigned)c == 0x0000FEFFu ?
Take(is) : c;
412 template <
typename InputByteStream>
413 static CharType
Take(InputByteStream& is) {
415 CharType c = (
unsigned char)is.Take();
416 c |= (
unsigned char)is.Take() << 8;
417 c |= (
unsigned char)is.Take() << 16;
418 c |= (
unsigned char)is.Take() << 24;
422 template <
typename OutputByteStream>
423 static void PutBOM(OutputByteStream& os) {
425 os.Put(0xFFu); os.Put(0xFEu); os.Put(0x00u); os.Put(0x00u);
428 template <
typename OutputByteStream>
429 static void Put(OutputByteStream& os, CharType c) {
432 os.Put((c >> 8) & 0xFFu);
433 os.Put((c >> 16) & 0xFFu);
434 os.Put((c >> 24) & 0xFFu);
439 template<
typename CharType =
unsigned>
441 template <
typename InputByteStream>
442 static CharType
TakeBOM(InputByteStream& is) {
444 CharType c =
Take(is);
445 return (
unsigned)c == 0x0000FEFFu ?
Take(is) : c;
448 template <
typename InputByteStream>
449 static CharType
Take(InputByteStream& is) {
451 CharType c = (
unsigned char)is.Take() << 24;
452 c |= (
unsigned char)is.Take() << 16;
453 c |= (
unsigned char)is.Take() << 8;
454 c |= (
unsigned char)is.Take();
458 template <
typename OutputByteStream>
459 static void PutBOM(OutputByteStream& os) {
461 os.Put(0x00u); os.Put(0x00u); os.Put(0xFEu); os.Put(0xFFu);
464 template <
typename OutputByteStream>
465 static void Put(OutputByteStream& os, CharType c) {
467 os.Put((c >> 24) & 0xFFu);
468 os.Put((c >> 16) & 0xFFu);
469 os.Put((c >> 8) & 0xFFu);
482 template<
typename CharType =
char>
488 template<
typename OutputStream>
489 static void Encode(OutputStream& os,
unsigned codepoint) {
491 os.Put(static_cast<Ch>(codepoint & 0xFF));
494 template <
typename InputStream>
495 static bool Decode(InputStream& is,
unsigned* codepoint) {
496 unsigned char c =
static_cast<unsigned char>(is.Take());
501 template <
typename InputStream,
typename OutputStream>
502 static bool Validate(InputStream& is, OutputStream& os) {
503 unsigned char c = is.Take();
508 template <
typename InputByteStream>
509 static CharType
TakeBOM(InputByteStream& is) {
515 template <
typename InputByteStream>
516 static Ch
Take(InputByteStream& is) {
521 template <
typename OutputByteStream>
522 static void PutBOM(OutputByteStream& os) {
527 template <
typename OutputByteStream>
528 static void Put(OutputByteStream& os, Ch c) {
530 os.Put(static_cast<typename OutputByteStream::Ch>(c));
549 template<
typename CharType>
555 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
557 template<
typename OutputStream>
558 RAPIDJSON_FORCEINLINE
static void Encode(OutputStream& os,
unsigned codepoint) {
559 typedef void (*EncodeFunc)(OutputStream&, unsigned);
561 (*f[os.GetType()])(os, codepoint);
564 template <
typename InputStream>
565 RAPIDJSON_FORCEINLINE
static bool Decode(InputStream& is,
unsigned* codepoint) {
566 typedef bool (*DecodeFunc)(InputStream&,
unsigned*);
568 return (*f[is.GetType()])(is, codepoint);
571 template <
typename InputStream,
typename OutputStream>
572 RAPIDJSON_FORCEINLINE
static bool Validate(InputStream& is, OutputStream& os) {
573 typedef bool (*ValidateFunc)(InputStream&, OutputStream&);
575 return (*f[is.GetType()])(is, os);
578 #undef RAPIDJSON_ENCODINGS_FUNC
585 template<
typename SourceEncoding,
typename TargetEncoding>
588 template<
typename InputStream,
typename OutputStream>
589 RAPIDJSON_FORCEINLINE
static bool Transcode(InputStream& is, OutputStream& os) {
591 if (!SourceEncoding::Decode(is, &codepoint))
593 TargetEncoding::Encode(os, codepoint);
598 template<
typename InputStream,
typename OutputStream>
599 RAPIDJSON_FORCEINLINE
static bool Validate(InputStream& is, OutputStream& os) {
605 template<
typename Encoding>
607 template<
typename InputStream,
typename OutputStream>
608 RAPIDJSON_FORCEINLINE
static bool Transcode(InputStream& is, OutputStream& os) {
613 template<
typename InputStream,
typename OutputStream>
614 RAPIDJSON_FORCEINLINE
static bool Validate(InputStream& is, OutputStream& os) {
615 return Encoding::Validate(is, os);
621 #if defined(__GNUC__) || defined(_MSV_VER)
625 #endif // RAPIDJSON_ENCODINGS_H_
static bool Decode(InputStream &is, unsigned *codepoint)
Definition: encodings.h:124
Encoding conversion.
Definition: encodings.h:586
static void Put(OutputByteStream &os, Ch c)
Definition: encodings.h:222
RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >=2)
static CharType TakeBOM(InputByteStream &is)
Definition: encodings.h:406
UTFType
Runtime-specified UTF encoding type of a stream.
Definition: encodings.h:538
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:344
ASCII encoding.
Definition: encodings.h:483
static void Encode(OutputStream &os, unsigned codepoint)
Definition: encodings.h:489
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition: rapidjson.h:119
static CharType TakeBOM(InputByteStream &is)
Definition: encodings.h:197
CharType Ch
Definition: encodings.h:97
UTF-32 big endian.
Definition: encodings.h:543
UTF-16 little endian.
Definition: encodings.h:540
UTF-8.
Definition: encodings.h:539
static bool Validate(InputStream &is, OutputStream &os)
Definition: encodings.h:153
UTF-16 encoding.
Definition: encodings.h:241
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition: rapidjson.h:375
static CharType TakeBOM(InputByteStream &is)
Definition: encodings.h:509
static void Put(OutputByteStream &os, Ch c)
Definition: encodings.h:528
UTF-16 big endian.
Definition: encodings.h:541
static bool Validate(InputStream &is, OutputStream &os)
Definition: encodings.h:394
static bool Validate(InputStream &is, OutputStream &os)
Definition: encodings.h:502
static void PutBOM(OutputByteStream &os)
Definition: encodings.h:522
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition: rapidjson.h:116
static void PutBOM(OutputByteStream &os)
Definition: encodings.h:216
CharType Ch
Definition: encodings.h:373
#define RAPIDJSON_ENCODINGS_FUNC(x)
Definition: encodings.h:555
static void Put(OutputByteStream &os, CharType c)
Definition: encodings.h:429
static Ch Take(InputByteStream &is)
Definition: encodings.h:210
#define bool
Definition: CascPort.h:16
static CharType Take(InputByteStream &is)
Definition: encodings.h:307
static void Put(OutputByteStream &os, CharType c)
Definition: encodings.h:321
static CharType TakeBOM(InputByteStream &is)
Definition: encodings.h:300
static CharType Take(InputByteStream &is)
Definition: encodings.h:339
static CharType Take(InputByteStream &is)
Definition: encodings.h:413
UTF-8 encoding.
Definition: encodings.h:96
static bool Decode(InputStream &is, unsigned *codepoint)
Definition: encodings.h:495
static CharType Take(InputByteStream &is)
Definition: encodings.h:449
static Ch Take(InputByteStream &is)
Definition: encodings.h:516
Dynamically select encoding according to stream's runtime-specified UTF encoding type.
Definition: encodings.h:550
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is, OutputStream &os)
Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the outp...
Definition: encodings.h:589
Definition: encodings.h:245
static void Put(OutputByteStream &os, CharType c)
Definition: encodings.h:353
static unsigned char GetRange(unsigned char c)
Definition: encodings.h:178
Definition: encodings.h:376
Definition: encodings.h:553
static void PutBOM(OutputByteStream &os)
Definition: encodings.h:315
Definition: encodings.h:99
UTF-32 encoding.
Definition: encodings.h:372
CharType Ch
Definition: encodings.h:242
UTF-16 big endian encoding.
Definition: encodings.h:330
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is, OutputStream &os)
Definition: encodings.h:608
static void Encode(OutputStream &os, unsigned codepoint)
Definition: encodings.h:379
UTF-32 big endian encoding.
Definition: encodings.h:440
CharType Ch
Definition: encodings.h:551
static void Encode(OutputStream &os, unsigned codepoint)
Definition: encodings.h:248
static RAPIDJSON_FORCEINLINE bool Decode(InputStream &is, unsigned *codepoint)
Definition: encodings.h:565
static bool Decode(InputStream &is, unsigned *codepoint)
Definition: encodings.h:263
common definitions and configuration
static RAPIDJSON_FORCEINLINE void Encode(OutputStream &os, unsigned codepoint)
Definition: encodings.h:558
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
Validate one Unicode codepoint from an encoded stream.
Definition: encodings.h:599
static CharType TakeBOM(InputByteStream &is)
Definition: encodings.h:442
static void Put(OutputByteStream &os, CharType c)
Definition: encodings.h:465
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
Definition: encodings.h:614
static void PutBOM(OutputByteStream &os)
Definition: encodings.h:423
UTF-32 little endian.
Definition: encodings.h:542
static bool Decode(InputStream &is, unsigned *codepoint)
Definition: encodings.h:386
CharType Ch
Definition: encodings.h:484
static void PutBOM(OutputByteStream &os)
Definition: encodings.h:459
static void Encode(OutputStream &os, unsigned codepoint)
Definition: encodings.h:102
Definition: encodings.h:486
RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >=4)
static void PutBOM(OutputByteStream &os)
Definition: encodings.h:347
UTF-16 little endian encoding.
Definition: encodings.h:298
static bool Validate(InputStream &is, OutputStream &os)
Definition: encodings.h:281
static CharType TakeBOM(InputByteStream &is)
Definition: encodings.h:332
UTF-32 little endian enocoding.
Definition: encodings.h:404
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
Definition: encodings.h:572