TrinityCore
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
AutoUTFInputStream< CharType, InputByteStream > Class Template Reference

Input stream wrapper with dynamically bound encoding and automatic encoding detection. More...

#include <encodedstream.h>

Public Types

typedef CharType Ch
 

Public Member Functions

 AutoUTFInputStream (InputByteStream &is, UTFType type=kUTF8)
 Constructor. More...
 
UTFType GetType () const
 
bool HasBOM () const
 
Ch Peek () const
 
Ch Take ()
 
size_t Tell () const
 
void Put (Ch)
 
void Flush ()
 
ChPutBegin ()
 
size_t PutEnd (Ch *)
 

Private Types

typedef Ch(* TakeFunc )(InputByteStream &is)
 

Private Member Functions

 RAPIDJSON_STATIC_ASSERT (sizeof(typename InputByteStream::Ch)==1)
 
 AutoUTFInputStream (const AutoUTFInputStream &)
 
AutoUTFInputStreamoperator= (const AutoUTFInputStream &)
 
void DetectType ()
 

Private Attributes

InputByteStream * is_
 
UTFType type_
 
Ch current_
 
TakeFunc takeFunc_
 
bool hasBOM_
 

Detailed Description

template<typename CharType, typename InputByteStream>
class AutoUTFInputStream< CharType, InputByteStream >

Input stream wrapper with dynamically bound encoding and automatic encoding detection.

Template Parameters
CharTypeType of character for reading.
InputByteStreamtype of input byte stream to be wrapped.

Member Typedef Documentation

template<typename CharType , typename InputByteStream >
typedef CharType AutoUTFInputStream< CharType, InputByteStream >::Ch
template<typename CharType , typename InputByteStream >
typedef Ch(* AutoUTFInputStream< CharType, InputByteStream >::TakeFunc)(InputByteStream &is)
private

Constructor & Destructor Documentation

template<typename CharType , typename InputByteStream >
AutoUTFInputStream< CharType, InputByteStream >::AutoUTFInputStream ( InputByteStream &  is,
UTFType  type = kUTF8 
)
inline

Constructor.

Parameters
isinput stream to be wrapped.
typeUTF encoding type if it is not detected from the stream.
111  : is_(&is), type_(type), hasBOM_(false) {
112  RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
113  DetectType();
114  static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) };
115  takeFunc_ = f[type_];
116  current_ = takeFunc_(*is_);
117  }
#define RAPIDJSON_ENCODINGS_FUNC(x)
Definition: encodedstream.h:93
UTFType type_
Definition: encodedstream.h:187
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:344
Ch(* TakeFunc)(InputByteStream &is)
Definition: encodedstream.h:185
UTF-32 big endian.
Definition: encodings.h:543
bool hasBOM_
Definition: encodedstream.h:190
UTF-8.
Definition: encodings.h:539
Ch Take()
Definition: encodedstream.h:123
void DetectType()
Definition: encodedstream.h:137
InputByteStream * is_
Definition: encodedstream.h:186
TakeFunc takeFunc_
Definition: encodedstream.h:189
Ch current_
Definition: encodedstream.h:188

+ Here is the call graph for this function:

template<typename CharType , typename InputByteStream >
AutoUTFInputStream< CharType, InputByteStream >::AutoUTFInputStream ( const AutoUTFInputStream< CharType, InputByteStream > &  )
private

Member Function Documentation

template<typename CharType , typename InputByteStream >
void AutoUTFInputStream< CharType, InputByteStream >::DetectType ( )
inlineprivate
137  {
138  // BOM (Byte Order Mark):
139  // 00 00 FE FF UTF-32BE
140  // FF FE 00 00 UTF-32LE
141  // FE FF UTF-16BE
142  // FF FE UTF-16LE
143  // EF BB BF UTF-8
144 
145  const unsigned char* c = (const unsigned char *)is_->Peek4();
146  if (!c)
147  return;
148 
149  unsigned bom = c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24);
150  hasBOM_ = false;
151  if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
152  else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
153  else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); }
154  else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); }
155  else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); }
156 
157  // RFC 4627: Section 3
158  // "Since the first two characters of a JSON text will always be ASCII
159  // characters [RFC0020], it is possible to determine whether an octet
160  // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
161  // at the pattern of nulls in the first four octets."
162  // 00 00 00 xx UTF-32BE
163  // 00 xx 00 xx UTF-16BE
164  // xx 00 00 00 UTF-32LE
165  // xx 00 xx 00 UTF-16LE
166  // xx xx xx xx UTF-8
167 
168  if (!hasBOM_) {
169  unsigned pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
170  switch (pattern) {
171  case 0x08: type_ = kUTF32BE; break;
172  case 0x0A: type_ = kUTF16BE; break;
173  case 0x01: type_ = kUTF32LE; break;
174  case 0x05: type_ = kUTF16LE; break;
175  case 0x0F: type_ = kUTF8; break;
176  default: break; // Use type defined by user.
177  }
178  }
179 
180  // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
181  if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
182  if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
183  }
UTFType type_
Definition: encodedstream.h:187
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:344
UTF-32 big endian.
Definition: encodings.h:543
bool hasBOM_
Definition: encodedstream.h:190
UTF-16 little endian.
Definition: encodings.h:540
UTF-8.
Definition: encodings.h:539
UTF-16 big endian.
Definition: encodings.h:541
InputByteStream * is_
Definition: encodedstream.h:186
const uint8_t bom[]
The library API - functions intended to be called by the users.
Definition: core.h:285
CharType Ch
Definition: encodedstream.h:104
UTF-32 little endian.
Definition: encodings.h:542

+ Here is the caller graph for this function:

template<typename CharType , typename InputByteStream >
void AutoUTFInputStream< CharType, InputByteStream >::Flush ( )
inline
128 { RAPIDJSON_ASSERT(false); }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:344
template<typename CharType , typename InputByteStream >
UTFType AutoUTFInputStream< CharType, InputByteStream >::GetType ( ) const
inline
119 { return type_; }
UTFType type_
Definition: encodedstream.h:187
template<typename CharType , typename InputByteStream >
bool AutoUTFInputStream< CharType, InputByteStream >::HasBOM ( ) const
inline
120 { return hasBOM_; }
bool hasBOM_
Definition: encodedstream.h:190
template<typename CharType , typename InputByteStream >
AutoUTFInputStream& AutoUTFInputStream< CharType, InputByteStream >::operator= ( const AutoUTFInputStream< CharType, InputByteStream > &  )
private
template<typename CharType , typename InputByteStream >
Ch AutoUTFInputStream< CharType, InputByteStream >::Peek ( ) const
inline
122 { return current_; }
Ch current_
Definition: encodedstream.h:188
template<typename CharType , typename InputByteStream >
void AutoUTFInputStream< CharType, InputByteStream >::Put ( Ch  )
inline
127 { RAPIDJSON_ASSERT(false); }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:344
template<typename CharType , typename InputByteStream >
Ch* AutoUTFInputStream< CharType, InputByteStream >::PutBegin ( )
inline
129 { RAPIDJSON_ASSERT(false); return 0; }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:344
template<typename CharType , typename InputByteStream >
size_t AutoUTFInputStream< CharType, InputByteStream >::PutEnd ( Ch )
inline
130 { RAPIDJSON_ASSERT(false); return 0; }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:344
template<typename CharType , typename InputByteStream >
AutoUTFInputStream< CharType, InputByteStream >::RAPIDJSON_STATIC_ASSERT ( sizeof(typename InputByteStream::Ch)  = =1)
private
template<typename CharType , typename InputByteStream >
Ch AutoUTFInputStream< CharType, InputByteStream >::Take ( )
inline
123 { Ch c = current_; current_ = takeFunc_(*is_); return c; }
InputByteStream * is_
Definition: encodedstream.h:186
CharType Ch
Definition: encodedstream.h:104
TakeFunc takeFunc_
Definition: encodedstream.h:189
Ch current_
Definition: encodedstream.h:188

+ Here is the caller graph for this function:

template<typename CharType , typename InputByteStream >
size_t AutoUTFInputStream< CharType, InputByteStream >::Tell ( ) const
inline
124 { return is_->Tell(); }
InputByteStream * is_
Definition: encodedstream.h:186

Member Data Documentation

template<typename CharType , typename InputByteStream >
Ch AutoUTFInputStream< CharType, InputByteStream >::current_
private
template<typename CharType , typename InputByteStream >
bool AutoUTFInputStream< CharType, InputByteStream >::hasBOM_
private
template<typename CharType , typename InputByteStream >
InputByteStream* AutoUTFInputStream< CharType, InputByteStream >::is_
private
template<typename CharType , typename InputByteStream >
TakeFunc AutoUTFInputStream< CharType, InputByteStream >::takeFunc_
private
template<typename CharType , typename InputByteStream >
UTFType AutoUTFInputStream< CharType, InputByteStream >::type_
private

The documentation for this class was generated from the following file: