RTBKit  0.9
Open-source framework to create real-time ad bidding systems.
soa/types/json_parsing.cc
00001 /* json_parsing.cc
00002    Jeremy Barnes, 8 March 2013
00003    Copyright (c) 2013 Datacratic Inc.  All rights reserved.
00004 
00005 */
00006 
00007 #include "json_parsing.h"
00008 #include "string.h"
00009 
00010 using namespace std;
00011 using namespace ML;
00012 
00013 namespace Datacratic {
00014 
00015 Utf8String
00016 StreamingJsonParsingContext::
00017 expectStringUtf8()
00018 {
00019     skipJsonWhitespace((*context));
00020     context->expect_literal('"');
00021 
00022     char internalBuffer[4096];
00023 
00024     char * buffer = internalBuffer;
00025     size_t bufferSize = 4096;
00026     size_t pos = 0;
00027 
00028     // Keep expanding until it fits
00029     while (!context->match_literal('"')) {
00030         // We need up to 4 characters to add a new UTF-8 code point
00031         if (pos >= bufferSize - 4) {
00032             size_t newBufferSize = bufferSize * 8;
00033             char * newBuffer = new char[newBufferSize];
00034             std::copy(buffer, buffer + bufferSize, newBuffer);
00035             if (buffer != internalBuffer)
00036                 delete[] buffer;
00037             buffer = newBuffer;
00038             bufferSize = newBufferSize;
00039         }
00040 
00041         int c = *(*context);
00042         
00043         //cerr << "c = " << c << " " << (char)c << endl;
00044 
00045         if (c < 0 || c > 127) {
00046             // Unicode
00047             c = utf8::unchecked::next(*context);
00048 
00049             char * p1 = buffer + pos;
00050             char * p2 = p1;
00051             pos += utf8::append(c, p2) - p1;
00052 
00053             continue;
00054         }
00055         ++(*context);
00056 
00057         if (c == '\\') {
00058             c = *(*context)++;
00059             switch (c) {
00060             case 't': c = '\t';  break;
00061             case 'n': c = '\n';  break;
00062             case 'r': c = '\r';  break;
00063             case 'f': c = '\f';  break;
00064             case '/': c = '/';   break;
00065             case '\\':c = '\\';  break;
00066             case '"': c = '"';   break;
00067             case 'u': {
00068                 int code = 0;
00069                 for (unsigned i = 0;  i < 4;  ++i) {
00070                     int c = *(*context)++;
00071                     int digit;
00072                     if (c >= '0' && c <= '9')
00073                         digit = c - '0';
00074                     else if (c >= 'a' && c <= 'f')
00075                         digit = c - 'a';
00076                     else if (c >= 'A' && c <= 'F')
00077                         digit = c - 'A';
00078                     else context->exception("invalid hexadecimal in code");
00079 
00080                     code = (code << 4) | digit;
00081                 }
00082                 //cerr << "code = " << code << endl;
00083                 c = code;
00084                 break;
00085             }
00086             default:
00087                 context->exception("invalid escaped char");
00088             }
00089         }
00090 
00091         if (c < ' ' || c >= 127) {
00092             char * p1 = buffer + pos;
00093             char * p2 = p1;
00094             pos += utf8::append(c, p2) - p1;
00095         }
00096         else buffer[pos++] = c;
00097     }
00098 
00099     Utf8String result(string(buffer, buffer + pos));
00100     if (buffer != internalBuffer)
00101         delete[] buffer;
00102     
00103     return result;
00104 }
00105 
00106 
00107 }  // namespace Datacratic
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator