![]() |
RTBKit
0.9
Open-source framework to create real-time ad bidding systems.
|
00001 /* json_parsing.cc 00002 Jeremy Barnes, 8 March 2013 00003 Copyright (c) 2013 Datacratic Inc. All rights reserved. 00004 00005 */ 00006 00007 #include "json_parsing.h" 00008 #include "string.h" 00009 00010 using namespace std; 00011 using namespace ML; 00012 00013 namespace Datacratic { 00014 00015 Utf8String 00016 StreamingJsonParsingContext:: 00017 expectStringUtf8() 00018 { 00019 skipJsonWhitespace((*context)); 00020 context->expect_literal('"'); 00021 00022 char internalBuffer[4096]; 00023 00024 char * buffer = internalBuffer; 00025 size_t bufferSize = 4096; 00026 size_t pos = 0; 00027 00028 // Keep expanding until it fits 00029 while (!context->match_literal('"')) { 00030 // We need up to 4 characters to add a new UTF-8 code point 00031 if (pos >= bufferSize - 4) { 00032 size_t newBufferSize = bufferSize * 8; 00033 char * newBuffer = new char[newBufferSize]; 00034 std::copy(buffer, buffer + bufferSize, newBuffer); 00035 if (buffer != internalBuffer) 00036 delete[] buffer; 00037 buffer = newBuffer; 00038 bufferSize = newBufferSize; 00039 } 00040 00041 int c = *(*context); 00042 00043 //cerr << "c = " << c << " " << (char)c << endl; 00044 00045 if (c < 0 || c > 127) { 00046 // Unicode 00047 c = utf8::unchecked::next(*context); 00048 00049 char * p1 = buffer + pos; 00050 char * p2 = p1; 00051 pos += utf8::append(c, p2) - p1; 00052 00053 continue; 00054 } 00055 ++(*context); 00056 00057 if (c == '\\') { 00058 c = *(*context)++; 00059 switch (c) { 00060 case 't': c = '\t'; break; 00061 case 'n': c = '\n'; break; 00062 case 'r': c = '\r'; break; 00063 case 'f': c = '\f'; break; 00064 case '/': c = '/'; break; 00065 case '\\':c = '\\'; break; 00066 case '"': c = '"'; break; 00067 case 'u': { 00068 int code = 0; 00069 for (unsigned i = 0; i < 4; ++i) { 00070 int c = *(*context)++; 00071 int digit; 00072 if (c >= '0' && c <= '9') 00073 digit = c - '0'; 00074 else if (c >= 'a' && c <= 'f') 00075 digit = c - 'a'; 00076 else if (c >= 'A' && c <= 'F') 00077 digit = c - 'A'; 00078 else context->exception("invalid hexadecimal in code"); 00079 00080 code = (code << 4) | digit; 00081 } 00082 //cerr << "code = " << code << endl; 00083 c = code; 00084 break; 00085 } 00086 default: 00087 context->exception("invalid escaped char"); 00088 } 00089 } 00090 00091 if (c < ' ' || c >= 127) { 00092 char * p1 = buffer + pos; 00093 char * p2 = p1; 00094 pos += utf8::append(c, p2) - p1; 00095 } 00096 else buffer[pos++] = c; 00097 } 00098 00099 Utf8String result(string(buffer, buffer + pos)); 00100 if (buffer != internalBuffer) 00101 delete[] buffer; 00102 00103 return result; 00104 } 00105 00106 00107 } // namespace Datacratic