RTBKit
0.9
Open-source framework to create real-time ad bidding systems.
|
00001 /* http_header.cc 00002 Jeremy Barnes, 18 February 2010 00003 Copyright (c) 2010 Datacratic. All rights reserved. 00004 00005 */ 00006 00007 #include "http_header.h" 00008 #include "jml/utils/parse_context.h" 00009 #include "jml/utils/string_functions.h" 00010 #include "jml/db/persistent.h" 00011 #include "jml/utils/vector_utils.h" 00012 00013 using namespace std; 00014 using namespace ML; 00015 00016 00017 namespace Datacratic { 00018 00019 00020 /*****************************************************************************/ 00021 /* REST PARAMS */ 00022 /*****************************************************************************/ 00023 00024 std::string 00025 RestParams:: 00026 uriEscaped() const 00027 { 00028 auto urlEscape = [] (const std::string & str) 00029 { 00030 string result; 00031 for (char c: str) { 00032 if (isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') 00033 result += c; 00034 else result += ML::format("%%%02x", c); 00035 } 00036 return result; 00037 }; 00038 00039 std::string uri; 00040 00041 for (unsigned i = 0; i < size(); ++i) { 00042 if (i == 0) 00043 uri += "?"; 00044 else uri += "&"; 00045 uri += urlEscape((*this)[i].first) 00046 + "=" + urlEscape((*this)[i].second); 00047 } 00048 00049 return uri; 00050 } 00051 00052 bool 00053 RestParams:: 00054 hasValue(const std::string & key) const 00055 { 00056 for (auto & kv: *this) 00057 if (kv.first == key) 00058 return true; 00059 return false; 00060 } 00061 00062 std::string 00063 RestParams:: 00064 getValue(const std::string & key) const 00065 { 00066 for (auto & kv: *this) 00067 if (kv.first == key) 00068 return kv.second; 00069 throw ML::Exception("key " + key + " not found in RestParams"); 00070 } 00071 00072 RestParams 00073 RestParams:: 00074 fromBinary(const std::string & binary) 00075 { 00076 using namespace ML::DB; 00077 00078 istringstream stream(binary); 00079 ML::DB::Store_Reader store(stream); 00080 unsigned char version; 00081 store >> version; 00082 if (version != 0) 00083 throw ML::Exception("invalid RestParams version"); 00084 00085 std::vector<std::pair<std::string, std::string> > v; 00086 store >> v; 00087 RestParams result; 00088 result.swap(v); 00089 return result; 00090 } 00091 00092 std::string 00093 RestParams:: 00094 toBinary() const 00095 { 00096 using namespace ML::DB; 00097 00098 ostringstream stream; 00099 ML::DB::Store_Writer writer(stream); 00100 unsigned char version = 0; 00101 writer << version 00102 << static_cast<const std::vector<std::pair<std::string, std::string> > &>(*this); 00103 return stream.str(); 00104 } 00105 00106 00107 /*****************************************************************************/ 00108 /* HTTP HEADER */ 00109 /*****************************************************************************/ 00110 00111 void 00112 HttpHeader:: 00113 swap(HttpHeader & other) 00114 { 00115 verb.swap(other.verb); 00116 resource.swap(other.resource); 00117 contentType.swap(other.contentType); 00118 std::swap(contentLength, other.contentLength); 00119 headers.swap(other.headers); 00120 knownData.swap(other.knownData); 00121 std::swap(isChunked, other.isChunked); 00122 std::swap(version, other.version); 00123 } 00124 00125 namespace { 00126 00127 std::string 00128 expectUrlEncodedString(ML::Parse_Context & context, 00129 string delimiters) 00130 { 00131 string result; 00132 while (context) { 00133 char c = *context; 00134 for (unsigned i = 0; i < delimiters.length(); ++i) 00135 if (c == delimiters[i]) 00136 return result; 00137 00138 ++context; 00139 00140 if (c == '%') { 00141 #if 0 00142 auto hexToInt = [&] (int c) 00143 { 00144 if (isdigit(c)) 00145 return c - '0'; 00146 if (isalpha(c)) { 00147 c = tolower(c); 00148 if (c >= 'a' && c <= 'f') 00149 return c + 10 - 'a'; 00150 context.exception("invalid hex character"); 00151 } 00152 }; 00153 #endif 00154 00155 char s[3] = { *context++, *context++, 0 }; 00156 char * endptr; 00157 int code = strtol(s, &endptr, 16); 00158 if (endptr != s + 2) { 00159 cerr << "s = " << (void *)s << endl; 00160 cerr << "endptr = " << (void *)endptr << endl; 00161 context.exception("invalid url encoded character: " + string(s)); 00162 } 00163 result += code; 00164 } 00165 else { 00166 result += c; 00167 } 00168 } 00169 00170 return result; 00171 } 00172 00173 } // file scope 00174 00175 void 00176 HttpHeader:: 00177 parse(const std::string & headerAndData) 00178 { 00179 try { 00180 HttpHeader parsed; 00181 00182 // Parse http 00183 ML::Parse_Context context("request header", 00184 headerAndData.c_str(), 00185 headerAndData.c_str() 00186 + headerAndData.length()); 00187 00188 parsed.verb = context.expect_text(" \n"); 00189 context.expect_literal(' '); 00190 parsed.resource = context.expect_text(" ?"); 00191 if (context.match_literal('?')) { 00192 while (!context.match_literal(' ')) { 00193 string key = expectUrlEncodedString(context, "=& "); 00194 if (context.match_literal(' ')) { 00195 queryParams.push_back(make_pair(key, "")); 00196 break; 00197 } 00198 if (context.match_literal('=')) { 00199 string value = expectUrlEncodedString(context, "& "); 00200 queryParams.push_back(make_pair(key, value)); 00201 } 00202 if (!context.match_literal('&')) 00203 break; 00204 } 00205 } 00206 context.expect_literal(' '); 00207 parsed.version = context.expect_text('\r'); 00208 context.expect_eol(); 00209 00210 while (!context.match_literal("\r\n")) { 00211 string name = lowercase(context.expect_text("\r\n:")); 00212 //cerr << "name = " << name << endl; 00213 context.expect_literal(':'); 00214 context.match_whitespace(); 00215 if (name == "content-length") { 00216 parsed.contentLength = context.expect_int(); 00217 //cerr << "******* set cntentLength " << parsed.contentLength 00218 // << endl; 00219 } 00220 else if (name == "content-type") 00221 parsed.contentType = context.expect_text('\r'); 00222 else if (name == "transfer-encoding") { 00223 string transferEncoding = lowercase(context.expect_text('\r')); 00224 00225 if (transferEncoding != "chunked") 00226 throw ML::Exception("unknown transfer-encoding"); 00227 parsed.isChunked = true; 00228 } 00229 else { 00230 string value = context.expect_text('\r'); 00231 parsed.headers[name] = value; 00232 } 00233 context.expect_eol(); 00234 } 00235 00236 // The rest of the data is the body 00237 const char * content_start 00238 = headerAndData.c_str() + context.get_offset(); 00239 00240 parsed.knownData 00241 = string(content_start, 00242 headerAndData.c_str() + headerAndData.length()); 00243 00244 if ((parsed.contentLength != -1) 00245 && ((int)parsed.knownData.length() > (int)parsed.contentLength)) { 00246 cerr << "got double packet: got content length " << parsed.knownData.length() 00247 << " wanted " << parsed.contentLength << endl; 00248 #if 1 00249 context.exception(format("too much data for content length: " 00250 "%d > %d for data \"%s\"", 00251 (int)parsed.knownData.length(), 00252 (int)parsed.contentLength, 00253 headerAndData.c_str())); 00254 #endif 00255 parsed.knownData.resize(parsed.contentLength); 00256 } 00257 00258 swap(parsed); 00259 } 00260 catch (const std::exception & exc) { 00261 cerr << "error parsing http header: " << exc.what() << endl; 00262 cerr << headerAndData << endl; 00263 throw; 00264 } 00265 } 00266 00267 std::ostream & operator << (std::ostream & stream, const HttpHeader & header) 00268 { 00269 stream << header.verb << " " << header.resource 00270 << header.queryParams.uriEscaped(); 00271 stream << "\r\n" 00272 << "Content-Type: " << header.contentType << "\r\n"; 00273 if (header.isChunked) 00274 stream << "Transfer-Encoding: chunked\r\n"; 00275 else if (header.contentLength != -1) 00276 stream << "Content-Length: " << header.contentLength << "\r\n"; 00277 for (auto it = header.headers.begin(), end = header.headers.end(); 00278 it != end; ++it) { 00279 stream << it->first << ": " << it->second << "\r\n"; 00280 } 00281 stream << "\r\n"; 00282 return stream; 00283 } 00284 00285 std::string getResponseReasonPhrase(int code) 00286 { 00287 switch (code) { 00288 case 100: return "Continue"; 00289 case 101: return "Switching Protocols"; 00290 case 200: return "OK"; 00291 case 201: return "Created"; 00292 case 202: return "Accepted"; 00293 case 203: return "Non-Authoritative Information"; 00294 case 204: return "No Content"; 00295 case 205: return "Reset Content"; 00296 case 206: return "Partial Content"; 00297 case 300: return "Multiple Choices"; 00298 case 301: return "Moved Permanently"; 00299 case 302: return "Found"; 00300 case 303: return "See Other"; 00301 case 304: return "Not Modified"; 00302 case 305: return "Use Proxy"; 00303 case 307: return "Temporary Redirect"; 00304 case 400: return "Bad Request"; 00305 case 401: return "Unauthorized"; 00306 case 402: return "Payment Required"; 00307 case 403: return "Forbidden"; 00308 case 404: return "Not Found"; 00309 case 405: return "Method Not Allowed"; 00310 case 406: return "Not Acceptable"; 00311 case 407: return "Proxy Authentication Required"; 00312 case 408: return "Request Time-out"; 00313 case 409: return "Conflict"; 00314 case 410: return "Gone"; 00315 case 411: return "Length Required"; 00316 case 412: return "Precondition Failed"; 00317 case 413: return "Request Entity Too Large"; 00318 case 414: return "Request-URI Too Large"; 00319 case 415: return "Unsupported Media Type"; 00320 case 416: return "Requested range not satisfiable"; 00321 case 417: return "Expectation Failed"; 00322 case 500: return "Internal Server Error"; 00323 case 501: return "Not Implemented"; 00324 case 502: return "Bad Gateway"; 00325 case 503: return "Service Unavailable"; 00326 case 504: return "Gateway Time-out"; 00327 case 505: return "HTTP Version not supported"; 00328 default: 00329 return ML::format("unknown response code %d", code); 00330 } 00331 } 00332 00333 } // namespace Datacratic