TrinityCore
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
encodedstream.h
Go to the documentation of this file.
1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #ifndef RAPIDJSON_ENCODEDSTREAM_H_
16 #define RAPIDJSON_ENCODEDSTREAM_H_
17 
18 #include "rapidjson.h"
19 
20 #ifdef __GNUC__
21 RAPIDJSON_DIAG_PUSH
22 RAPIDJSON_DIAG_OFF(effc++)
23 #endif
24 
26 
28 
32 template <typename Encoding, typename InputByteStream>
34  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
35 public:
36  typedef typename Encoding::Ch Ch;
37 
38  EncodedInputStream(InputByteStream& is) : is_(is) {
39  current_ = Encoding::TakeBOM(is_);
40  }
41 
42  Ch Peek() const { return current_; }
43  Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; }
44  size_t Tell() const { return is_.Tell(); }
45 
46  // Not implemented
47  void Put(Ch) { RAPIDJSON_ASSERT(false); }
48  void Flush() { RAPIDJSON_ASSERT(false); }
49  Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
50  size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
51 
52 private:
55 
56  InputByteStream& is_;
58 };
59 
61 
65 template <typename Encoding, typename OutputByteStream>
67  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
68 public:
69  typedef typename Encoding::Ch Ch;
70 
71  EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) {
72  if (putBOM)
73  Encoding::PutBOM(os_);
74  }
75 
76  void Put(Ch c) { Encoding::Put(os_, c); }
77  void Flush() { os_.Flush(); }
78 
79  // Not implemented
80  Ch Peek() const { RAPIDJSON_ASSERT(false); }
81  Ch Take() { RAPIDJSON_ASSERT(false); }
82  size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
83  Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
84  size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
85 
86 private:
89 
90  OutputByteStream& os_;
91 };
92 
93 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
94 
96 
100 template <typename CharType, typename InputByteStream>
102  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
103 public:
104  typedef CharType Ch;
105 
107 
111  AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) {
112  RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
113  DetectType();
114  static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) };
115  takeFunc_ = f[type_];
116  current_ = takeFunc_(*is_);
117  }
118 
119  UTFType GetType() const { return type_; }
120  bool HasBOM() const { return hasBOM_; }
121 
122  Ch Peek() const { return current_; }
123  Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; }
124  size_t Tell() const { return is_->Tell(); }
125 
126  // Not implemented
127  void Put(Ch) { RAPIDJSON_ASSERT(false); }
128  void Flush() { RAPIDJSON_ASSERT(false); }
129  Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
130  size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
131 
132 private:
135 
136  // Detect encoding type with BOM or RFC 4627
137  void DetectType() {
138  // BOM (Byte Order Mark):
139  // 00 00 FE FF UTF-32BE
140  // FF FE 00 00 UTF-32LE
141  // FE FF UTF-16BE
142  // FF FE UTF-16LE
143  // EF BB BF UTF-8
144 
145  const unsigned char* c = (const unsigned char *)is_->Peek4();
146  if (!c)
147  return;
148 
149  unsigned bom = c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24);
150  hasBOM_ = false;
151  if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
152  else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
153  else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); }
154  else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); }
155  else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); }
156 
157  // RFC 4627: Section 3
158  // "Since the first two characters of a JSON text will always be ASCII
159  // characters [RFC0020], it is possible to determine whether an octet
160  // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
161  // at the pattern of nulls in the first four octets."
162  // 00 00 00 xx UTF-32BE
163  // 00 xx 00 xx UTF-16BE
164  // xx 00 00 00 UTF-32LE
165  // xx 00 xx 00 UTF-16LE
166  // xx xx xx xx UTF-8
167 
168  if (!hasBOM_) {
169  unsigned pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
170  switch (pattern) {
171  case 0x08: type_ = kUTF32BE; break;
172  case 0x0A: type_ = kUTF16BE; break;
173  case 0x01: type_ = kUTF32LE; break;
174  case 0x05: type_ = kUTF16LE; break;
175  case 0x0F: type_ = kUTF8; break;
176  default: break; // Use type defined by user.
177  }
178  }
179 
180  // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
181  if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
182  if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
183  }
184 
185  typedef Ch (*TakeFunc)(InputByteStream& is);
186  InputByteStream* is_;
190  bool hasBOM_;
191 };
192 
194 
198 template <typename CharType, typename OutputByteStream>
200  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
201 public:
202  typedef CharType Ch;
203 
205 
210  AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) {
211  RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
212 
213  // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
214  if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
215  if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
216 
217  static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) };
218  putFunc_ = f[type_];
219 
220  if (putBOM)
221  PutBOM();
222  }
223 
224  UTFType GetType() const { return type_; }
225 
226  void Put(Ch c) { putFunc_(*os_, c); }
227  void Flush() { os_->Flush(); }
228 
229  // Not implemented
230  Ch Peek() const { RAPIDJSON_ASSERT(false); }
231  Ch Take() { RAPIDJSON_ASSERT(false); }
232  size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
233  Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
234  size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
235 
236 private:
239 
240  void PutBOM() {
241  typedef void (*PutBOMFunc)(OutputByteStream&);
242  static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) };
243  f[type_](*os_);
244  }
245 
246  typedef void (*PutFunc)(OutputByteStream&, Ch);
247 
248  OutputByteStream* os_;
251 };
252 
253 #undef RAPIDJSON_ENCODINGS_FUNC
254 
256 
257 #ifdef __GNUC__
258 RAPIDJSON_DIAG_POP
259 #endif
260 
261 #endif // RAPIDJSON_FILESTREAM_H_
#define RAPIDJSON_ENCODINGS_FUNC(x)
Definition: encodedstream.h:93
CharType Ch
Definition: encodedstream.h:202
UTFType type_
Definition: encodedstream.h:187
UTFType
Runtime-specified UTF encoding type of a stream.
Definition: encodings.h:538
bool HasBOM() const
Definition: encodedstream.h:120
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:344
Ch * PutBegin()
Definition: encodedstream.h:83
Ch(* TakeFunc)(InputByteStream &is)
Definition: encodedstream.h:185
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition: rapidjson.h:119
void Flush()
Definition: encodedstream.h:77
Ch * PutBegin()
Definition: encodedstream.h:49
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch)==1)
OutputByteStream * os_
Definition: encodedstream.h:248
UTF-32 big endian.
Definition: encodings.h:543
Ch Take()
Definition: encodedstream.h:81
bool hasBOM_
Definition: encodedstream.h:190
UTFType GetType() const
Definition: encodedstream.h:224
UTF-16 little endian.
Definition: encodings.h:540
EncodedInputStream & operator=(const EncodedInputStream &)
void Put(Ch c)
Definition: encodedstream.h:76
Output byte stream wrapper with statically bound encoding.
Definition: encodedstream.h:66
UTF-8.
Definition: encodings.h:539
AutoUTFOutputStream(OutputByteStream &os, UTFType type, bool putBOM)
Constructor.
Definition: encodedstream.h:210
Ch Take()
Definition: encodedstream.h:123
#define false
Definition: CascPort.h:18
size_t PutEnd(Ch *)
Definition: encodedstream.h:234
Encoding::Ch Ch
Definition: encodedstream.h:36
void DetectType()
Definition: encodedstream.h:137
void Put(Ch)
Definition: encodedstream.h:47
UTF-16 big endian.
Definition: encodings.h:541
EncodedOutputStream(OutputByteStream &os, bool putBOM=true)
Definition: encodedstream.h:71
Ch Peek() const
Definition: encodedstream.h:80
PutFunc putFunc_
Definition: encodedstream.h:250
EncodedOutputStream & operator=(const EncodedOutputStream &)
size_t Tell() const
Definition: encodedstream.h:82
Output stream wrapper with dynamically bound encoding and automatic encoding detection.
Definition: encodedstream.h:199
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition: rapidjson.h:116
void Put(Ch)
Definition: encodedstream.h:127
size_t PutEnd(Ch *)
Definition: encodedstream.h:50
Ch Take()
Definition: encodedstream.h:43
EncodedInputStream(InputByteStream &is)
Definition: encodedstream.h:38
Ch current_
Definition: encodedstream.h:57
Ch * PutBegin()
Definition: encodedstream.h:129
void Flush()
Definition: encodedstream.h:128
AutoUTFOutputStream & operator=(const AutoUTFOutputStream &)
Ch Peek() const
Definition: encodedstream.h:230
InputByteStream * is_
Definition: encodedstream.h:186
Ch Peek() const
Definition: encodedstream.h:42
AutoUTFInputStream(InputByteStream &is, UTFType type=kUTF8)
Constructor.
Definition: encodedstream.h:111
UTFType type_
Definition: encodedstream.h:249
Input byte stream wrapper with a statically bound encoding.
Definition: encodedstream.h:33
size_t Tell() const
Definition: encodedstream.h:232
void(* PutFunc)(OutputByteStream &, Ch)
Definition: encodedstream.h:246
const uint8_t bom[]
The library API - functions intended to be called by the users.
Definition: core.h:285
Encoding::Ch Ch
Definition: encodedstream.h:69
void Flush()
Definition: encodedstream.h:48
size_t Tell() const
Definition: encodedstream.h:44
Ch Peek() const
Definition: encodedstream.h:122
Input stream wrapper with dynamically bound encoding and automatic encoding detection.
Definition: encodedstream.h:101
Ch * PutBegin()
Definition: encodedstream.h:233
CharType Ch
Definition: encodedstream.h:104
void Flush()
Definition: encodedstream.h:227
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch)==1)
common definitions and configuration
void Put(Ch c)
Definition: encodedstream.h:226
TakeFunc takeFunc_
Definition: encodedstream.h:189
UTFType GetType() const
Definition: encodedstream.h:119
size_t PutEnd(Ch *)
Definition: encodedstream.h:130
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch)==1)
UTF-32 little endian.
Definition: encodings.h:542
InputByteStream & is_
Definition: encodedstream.h:56
OutputByteStream & os_
Definition: encodedstream.h:90
Ch current_
Definition: encodedstream.h:188
size_t Tell() const
Definition: encodedstream.h:124
void PutBOM()
Definition: encodedstream.h:240
size_t PutEnd(Ch *)
Definition: encodedstream.h:84
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch)==1)
Ch Take()
Definition: encodedstream.h:231
AutoUTFInputStream & operator=(const AutoUTFInputStream &)