![]() |
RTBKit
0.9
Open-source framework to create real-time ad bidding systems.
|
00001 /* string.cc 00002 Sunil Rottoo, 27 April 2012 00003 Copyright (c) 2012 Datacratic. All rights reserved. 00004 00005 */ 00006 00007 #include "string.h" 00008 #include "soa/js/js_value.h" 00009 #include "soa/jsoncpp/json.h" 00010 #include <iostream> 00011 #include "jml/arch/exception.h" 00012 #include "jml/db/persistent.h" 00013 00014 using namespace std; 00015 00016 00017 namespace Datacratic { 00018 00019 00020 /*****************************************************************************/ 00021 /* UTF8STRING */ 00022 /****************************************************************************/ 00023 00024 Utf8String 00025 Utf8String::fromLatin1(const std::string & lat1Str) 00026 { 00027 size_t bufferSize = lat1Str.size(); 00028 const char *inBuf = lat1Str.c_str(); 00029 string utf8Str(bufferSize * 4, '.'); 00030 00031 auto iter = utf8Str.begin(); 00032 auto start = iter; 00033 for (size_t i = 0; i < bufferSize; i++) { 00034 uint32_t cp(inBuf[i] & 0xff); 00035 iter = utf8::append(cp, iter); 00036 } 00037 utf8Str.resize(iter-start); 00038 00039 return Utf8String(utf8Str); 00040 } 00041 00042 Utf8String::Utf8String(const string & in, bool check) 00043 : data_(in) 00044 { 00045 if (check) 00046 { 00047 // Check if we find an invalid encoding 00048 string::const_iterator end_it = utf8::find_invalid(in.begin(), in.end()); 00049 if (end_it != in.end()) 00050 { 00051 throw ML::Exception("Invalid sequence within utf-8 string"); 00052 } 00053 } 00054 } 00055 00056 Utf8String::Utf8String(string && in, bool check) 00057 : data_(std::move(in)) 00058 { 00059 if (check) 00060 { 00061 // Check if we find an invalid encoding 00062 string::const_iterator end_it = utf8::find_invalid(data_.begin(), data_.end()); 00063 if (end_it != data_.end()) 00064 { 00065 throw ML::Exception("Invalid sequence within utf-8 string"); 00066 } 00067 } 00068 } 00069 00070 Utf8String::const_iterator 00071 Utf8String::begin() const 00072 { 00073 return Utf8String::const_iterator(data_.begin(), data_.begin(), data_.end()) ; 00074 } 00075 00076 Utf8String::const_iterator 00077 Utf8String::end() const 00078 { 00079 return Utf8String::const_iterator(data_.end(), data_.begin(), data_.end()) ; 00080 } 00081 00082 Utf8String &Utf8String::operator+=(const Utf8String &utf8str) 00083 { 00084 data_ += utf8str.data_; 00085 return *this; 00086 } 00087 00088 std::ostream & operator << (std::ostream & stream, const Utf8String & str) 00089 { 00090 stream << string(str.rawData(), str.rawLength()) ; 00091 return stream; 00092 } 00093 00094 void 00095 Utf8String:: 00096 serialize(ML::DB::Store_Writer & store) const 00097 { 00098 store << data_; 00099 } 00100 00101 void 00102 Utf8String:: 00103 reconstitute(ML::DB::Store_Reader & store) 00104 { 00105 store >> data_; 00106 } 00107 00108 string Utf8String::extractAscii() 00109 { 00110 string s; 00111 for(auto it = begin(); it != end(); it++) { 00112 char c = *it; 00113 if (c >= ' ' && c < 127) { 00114 s += c; 00115 } else { 00116 s += '?'; 00117 } 00118 } 00119 return s; 00120 } 00121 00122 } // namespace Datacratic