RTBKit
0.9
Open-source framework to create real-time ad bidding systems.
|
00001 /* json_parsing.h -*- C++ -*- 00002 Jeremy Barnes, 22 February 2013 00003 Copyright (c) 2013 Datacratic Inc. All rights reserved. 00004 00005 */ 00006 00007 #pragma once 00008 00009 #include "soa/jsoncpp/json.h" 00010 #include "jml/utils/json_parsing.h" 00011 #include "soa/types/id.h" 00012 #include "soa/types/string.h" 00013 00014 namespace Datacratic { 00015 00016 struct JsonParsingContext; 00017 struct ValueDescription; 00018 00019 struct JsonPathEntry { 00020 JsonPathEntry(int index) 00021 : index(index), keyPtr(0), fieldNumber(0) 00022 { 00023 } 00024 00025 JsonPathEntry(std::string key) 00026 : index(-1), key(std::move(key)), keyPtr(this->key.c_str()), 00027 fieldNumber(0) 00028 { 00029 } 00030 00031 JsonPathEntry(const char * keyPtr) 00032 : index(-1), keyPtr(keyPtr) 00033 { 00034 } 00035 00036 int index; 00037 std::string key; 00038 const char * keyPtr; 00039 int fieldNumber; 00040 00041 std::string fieldName() const 00042 { 00043 return key.empty() && keyPtr ? keyPtr : key; 00044 } 00045 00046 const char * fieldNamePtr() const 00047 { 00048 return keyPtr; 00049 } 00050 00051 }; 00052 00053 struct JsonPath: public std::vector<JsonPathEntry> { 00054 std::string print() const 00055 { 00056 std::string result; 00057 for (auto & e: *this) { 00058 if (e.index == -1) 00059 result += "." + e.fieldName(); 00060 else result += '[' + std::to_string(e.index) + ']'; 00061 } 00062 return result; 00063 } 00064 00065 std::string fieldName() const 00066 { 00067 return this->back().fieldName(); 00068 } 00069 00070 const char * fieldNamePtr() const 00071 { 00072 return this->back().fieldNamePtr(); 00073 } 00074 00075 void push(JsonPathEntry entry, int fieldNum = 0) 00076 { 00077 entry.fieldNumber = fieldNum; 00078 this->emplace_back(std::move(entry)); 00079 } 00080 00081 void replace(const JsonPathEntry & entry) 00082 { 00083 int newFieldNumber = this->back().fieldNumber + 1; 00084 this->back() = entry; 00085 this->back().fieldNumber = newFieldNumber; 00086 } 00087 00088 void pop() 00089 { 00090 this->pop_back(); 00091 } 00092 00093 }; 00094 00095 /*****************************************************************************/ 00096 /* JSON PARSING CONTEXT */ 00097 /*****************************************************************************/ 00098 00099 struct JsonParsingContext { 00100 00101 JsonPath path; 00102 00103 std::string printPath() const 00104 { 00105 return path.print(); 00106 } 00107 00108 std::string fieldName() const 00109 { 00110 return path.fieldName(); 00111 } 00112 00113 const char * fieldNamePtr() const 00114 { 00115 return path.fieldNamePtr(); 00116 } 00117 00118 void pushPath(const JsonPathEntry & entry, int memberNumber = 0) 00119 { 00120 path.push(entry, memberNumber); 00121 } 00122 00123 void replacePath(const JsonPathEntry & entry) 00124 { 00125 path.replace(entry); 00126 } 00127 00128 void popPath() 00129 { 00130 path.pop(); 00131 } 00132 00133 typedef std::function<void ()> OnUnknownField; 00134 00135 std::vector<OnUnknownField> onUnknownFieldHandlers; 00136 00137 void onUnknownField() 00138 { 00139 if (!onUnknownFieldHandlers.empty()) 00140 onUnknownFieldHandlers.back()(); 00141 else exception("unknown field " + printPath()); 00142 } 00143 00146 virtual void exception(const std::string & message) = 0; 00147 00148 virtual int expectInt() = 0; 00149 virtual float expectFloat() = 0; 00150 virtual double expectDouble() = 0; 00151 virtual bool expectBool() = 0; 00152 virtual bool matchUnsignedLongLong(unsigned long long & val) = 0; 00153 virtual bool matchLongLong(long long & val) = 0; 00154 virtual std::string expectStringAscii() = 0; 00155 virtual Utf8String expectStringUtf8() = 0; 00156 virtual Json::Value expectJson() = 0; 00157 virtual void expectNull() = 0; 00158 virtual bool isObject() const = 0; 00159 virtual bool isString() const = 0; 00160 virtual bool isArray() const = 0; 00161 virtual bool isBool() const = 0; 00162 virtual bool isNumber() const = 0; 00163 virtual bool isNull() const = 0; 00164 #if 0 00165 virtual bool isInt() const = 0; 00166 #endif 00167 virtual void skip() = 0; 00168 00169 virtual void forEachMember(const std::function<void ()> & fn) = 0; 00170 virtual void forEachElement(const std::function<void ()> & fn) = 0; 00171 }; 00172 00173 00174 /*****************************************************************************/ 00175 /* STREAMING JSON PARSING CONTEXT */ 00176 /*****************************************************************************/ 00177 00184 struct StreamingJsonParsingContext 00185 : public JsonParsingContext { 00186 00187 StreamingJsonParsingContext() 00188 { 00189 } 00190 00191 template<typename... Args> 00192 StreamingJsonParsingContext(Args &&... args) 00193 { 00194 init(std::forward<Args>(args)...); 00195 } 00196 00197 template<typename... Args> 00198 void init(Args &&... args) 00199 { 00200 ownedContext.reset(new ML::Parse_Context(std::forward<Args>(args)...)); 00201 context = ownedContext.get(); 00202 } 00203 00204 void init(ML::Parse_Context & context) 00205 { 00206 this->context = &context; 00207 ownedContext.reset(); 00208 } 00209 00210 ML::Parse_Context * context; 00211 std::unique_ptr<ML::Parse_Context> ownedContext; 00212 00213 template<typename Fn> 00214 void forEachMember(const Fn & fn) 00215 { 00216 int memberNum = 0; 00217 00218 auto onMember = [&] (const char * memberName, 00219 ML::Parse_Context &) 00220 { 00221 // This structure takes care of pushing and popping our 00222 // path entry. It will make sure the member is always 00223 // popped no matter what 00224 struct PathPusher { 00225 PathPusher(const char * memberName, 00226 int memberNum, 00227 StreamingJsonParsingContext * context) 00228 : context(context) 00229 { 00230 context->pushPath(memberName, memberNum); 00231 } 00232 00233 ~PathPusher() 00234 { 00235 context->popPath(); 00236 } 00237 00238 StreamingJsonParsingContext * const context; 00239 } pusher(memberName, memberNum++, this); 00240 00241 fn(); 00242 }; 00243 00244 expectJsonObjectAscii(*context, onMember); 00245 } 00246 00247 virtual void forEachMember(const std::function<void ()> & fn) 00248 { 00249 return forEachMember<std::function<void ()> >(fn); 00250 } 00251 00252 template<typename Fn> 00253 void forEachElement(const Fn & fn) 00254 { 00255 bool first = true; 00256 00257 auto onElement = [&] (int index, ML::Parse_Context &) 00258 { 00259 if (first) 00260 pushPath(index); 00261 else replacePath(index); 00262 00263 fn(); 00264 00265 first = false; 00266 }; 00267 00268 expectJsonArray(*context, onElement); 00269 00270 if (!first) 00271 popPath(); 00272 } 00273 00274 virtual void forEachElement(const std::function<void ()> & fn) 00275 { 00276 return forEachElement<std::function<void ()> >(fn); 00277 } 00278 00279 void skip() 00280 { 00281 ML::expectJson(*context); 00282 } 00283 00284 virtual int expectInt() 00285 { 00286 return context->expect_int(); 00287 } 00288 00289 virtual float expectFloat() 00290 { 00291 return context->expect_float(); 00292 } 00293 00294 virtual double expectDouble() 00295 { 00296 return context->expect_double(); 00297 } 00298 00299 virtual bool expectBool() 00300 { 00301 return ML::expectJsonBool(*context); 00302 } 00303 00304 virtual void expectNull() 00305 { 00306 context->expect_literal("null"); 00307 } 00308 00309 virtual bool matchUnsignedLongLong(unsigned long long & val) 00310 { 00311 return context->match_unsigned_long_long(val); 00312 } 00313 00314 virtual bool matchLongLong(long long & val) 00315 { 00316 return context->match_long_long(val); 00317 } 00318 00319 virtual std::string expectStringAscii() 00320 { 00321 return expectJsonStringAscii(*context); 00322 } 00323 00324 virtual Utf8String expectStringUtf8(); 00325 00326 virtual bool isObject() const 00327 { 00328 char c = *(*context); 00329 return c == '{'; 00330 } 00331 00332 virtual bool isString() const 00333 { 00334 char c = *(*context); 00335 return c == '\"'; 00336 } 00337 00338 virtual bool isArray() const 00339 { 00340 char c = *(*context); 00341 return c == '['; 00342 } 00343 00344 virtual bool isBool() const 00345 { 00346 char c = *(*context); 00347 return c == 't' || c == 'f'; 00348 00349 } 00350 00351 virtual bool isNumber() const 00352 { 00353 ML::Parse_Context::Revert_Token token(*context); 00354 double d; 00355 if (context->match_double(d)) 00356 return true; 00357 return false; 00358 } 00359 00360 virtual bool isNull() const 00361 { 00362 ML::Parse_Context::Revert_Token token(*context); 00363 if (context->match_literal("null")) 00364 return true; 00365 return false; 00366 } 00367 00368 #if 0 00369 virtual bool isNumber() const 00370 { 00371 char c = *(*context); 00372 if (c >= '0' && c <= '9') 00373 return true; 00374 if (c == '.' || c == '+' || c == '-') 00375 return true; 00376 if (c == 'N' || c == 'I') // NaN or Inf 00377 return true; 00378 return false; 00379 } 00380 #endif 00381 00382 virtual void exception(const std::string & message) 00383 { 00384 context->exception(message); 00385 } 00386 00387 #if 0 00388 virtual bool isInt() const 00389 { 00390 Revert_Token token(*context); 00391 long long l; 00392 if (match_long_long(l)) 00393 return true; 00394 return false; 00395 } 00396 #endif 00397 00398 virtual Json::Value expectJson() 00399 { 00400 return ML::expectJson(*context); 00401 } 00402 }; 00403 00404 struct StructuredJsonParsingContext: public JsonParsingContext { 00405 00406 StructuredJsonParsingContext(const Json::Value & val) 00407 : current(&val) 00408 { 00409 } 00410 00411 const Json::Value * current; 00412 00413 virtual void exception(const std::string & message) 00414 { 00415 throw ML::Exception("At path " + printPath() + ": " + message); 00416 } 00417 00418 virtual int expectInt() 00419 { 00420 return current->asInt(); 00421 } 00422 00423 virtual float expectFloat() 00424 { 00425 return current->asDouble(); 00426 } 00427 00428 virtual double expectDouble() 00429 { 00430 return current->asDouble(); 00431 } 00432 00433 virtual bool expectBool() 00434 { 00435 return current->asBool(); 00436 } 00437 00438 virtual void expectNull() 00439 { 00440 if (!current->isNull()) 00441 exception("expected null value"); 00442 } 00443 00444 virtual bool matchUnsignedLongLong(unsigned long long & val) 00445 { 00446 if (current->isIntegral()) { 00447 val = current->asUInt(); 00448 return true; 00449 } 00450 return false; 00451 } 00452 00453 virtual bool matchLongLong(long long & val) 00454 { 00455 if (current->isIntegral()) { 00456 val = current->asInt(); 00457 return true; 00458 } 00459 return false; 00460 } 00461 00462 virtual std::string expectStringAscii() 00463 { 00464 return current->asString(); 00465 } 00466 00467 virtual Utf8String expectStringUtf8() 00468 { 00469 return Utf8String(current->asString()); 00470 } 00471 00472 virtual Json::Value expectJson() 00473 { 00474 return *current; 00475 } 00476 00477 virtual bool isObject() const 00478 { 00479 return current->type() == Json::objectValue; 00480 } 00481 00482 virtual bool isString() const 00483 { 00484 return current->type() == Json::stringValue; 00485 } 00486 00487 virtual bool isArray() const 00488 { 00489 return current->type() == Json::arrayValue; 00490 } 00491 00492 virtual bool isBool() const 00493 { 00494 return current->type() == Json::booleanValue; 00495 } 00496 00497 virtual bool isNumber() const 00498 { 00499 return current->isNumeric(); 00500 } 00501 00502 virtual bool isNull() const 00503 { 00504 return current->isNull(); 00505 } 00506 00507 virtual void skip() 00508 { 00509 } 00510 00511 virtual void forEachMember(const std::function<void ()> & fn) 00512 { 00513 if (!isObject()) 00514 exception("expected an object"); 00515 00516 const Json::Value * oldCurrent = current; 00517 int memberNum = 0; 00518 00519 for (auto it = current->begin(), end = current->end(); 00520 it != end; ++it) { 00521 00522 // This structure takes care of pushing and popping our 00523 // path entry. It will make sure the member is always 00524 // popped no matter what 00525 struct PathPusher { 00526 PathPusher(const std::string & memberName, 00527 int memberNum, 00528 StructuredJsonParsingContext * context) 00529 : context(context) 00530 { 00531 context->pushPath(memberName, memberNum); 00532 } 00533 00534 ~PathPusher() 00535 { 00536 context->popPath(); 00537 } 00538 00539 StructuredJsonParsingContext * const context; 00540 } pusher(it.memberName(), memberNum++, this); 00541 00542 current = &(*it); 00543 fn(); 00544 } 00545 00546 current = oldCurrent; 00547 } 00548 00549 virtual void forEachElement(const std::function<void ()> & fn) 00550 { 00551 if (!isArray()) 00552 exception("expected an array"); 00553 00554 const Json::Value * oldCurrent = current; 00555 00556 for (unsigned i = 0; i < oldCurrent->size(); ++i) { 00557 if (i == 0) 00558 pushPath(i); 00559 else replacePath(i); 00560 00561 current = &(*oldCurrent)[i]; 00562 00563 fn(); 00564 } 00565 00566 if (oldCurrent->size() != 0) 00567 popPath(); 00568 00569 current = oldCurrent; 00570 } 00571 }; 00572 00573 00574 /*****************************************************************************/ 00575 /* STRING JSON PARSING CONTEXT */ 00576 /*****************************************************************************/ 00577 00578 struct StringJsonParsingContext 00579 : public StreamingJsonParsingContext { 00580 00581 StringJsonParsingContext(std::string str_, 00582 const std::string & filename = "<<internal>>") 00583 : str(std::move(str_)) 00584 { 00585 init(filename, str.c_str(), str.c_str() + str.size()); 00586 } 00587 00588 std::string str; 00589 }; 00590 00591 00592 /*****************************************************************************/ 00593 /* UTILITIES */ 00594 /*****************************************************************************/ 00595 00596 template<typename Context> 00597 void parseJson(int * output, Context & context) 00598 { 00599 *output = context.expect_int(); 00600 } 00601 00602 template<typename Context> 00603 void parseJson(float * output, Context & context) 00604 { 00605 *output = context.expect_float(); 00606 } 00607 00608 template<typename Context> 00609 void parseJson(double * output, Context & context) 00610 { 00611 *output = context.expect_double(); 00612 } 00613 00614 template<typename Context> 00615 void parseJson(Id * output, Context & context) 00616 { 00617 using namespace std; 00618 00619 unsigned long long i; 00620 if (context.matchUnsignedLongLong(i)) { 00621 cerr << "got unsigned " << i << endl; 00622 *output = Id(i); 00623 return; 00624 } 00625 00626 signed long long l; 00627 if (context.matchLongLong(l)) { 00628 cerr << "got signed " << l << endl; 00629 *output = Id(l); 00630 return; 00631 } 00632 00633 std::string s = context.expectStringAscii(); 00634 *output = Id(s); 00635 } 00636 00637 template<typename Context, typename T> 00638 void parseJson(std::vector<T> * output, Context & context) 00639 { 00640 throw ML::Exception("vector not done"); 00641 } 00642 00643 template<typename Context> 00644 void parseJson(Json::Value * output, Context & context) 00645 { 00646 *output = context.expectJson(); 00647 } 00648 00649 } // namespace Datacratic