RTBKit  0.9
Open-source framework to create real-time ad bidding systems.
soa/types/string.cc
00001 /* string.cc
00002    Sunil Rottoo, 27 April 2012
00003    Copyright (c) 2012 Datacratic.  All rights reserved.
00004 
00005 */
00006 
00007 #include "string.h"
00008 #include "soa/js/js_value.h"
00009 #include "soa/jsoncpp/json.h"
00010 #include <iostream>
00011 #include "jml/arch/exception.h"
00012 #include "jml/db/persistent.h"
00013 
00014 using namespace std;
00015 
00016 
00017 namespace Datacratic {
00018 
00019 
00020 /*****************************************************************************/
00021 /* UTF8STRING                                                                */
00022 /****************************************************************************/
00023 
00024 Utf8String
00025 Utf8String::fromLatin1(const std::string & lat1Str)
00026 {
00027     size_t bufferSize = lat1Str.size();
00028     const char *inBuf = lat1Str.c_str();
00029     string utf8Str(bufferSize * 4, '.');
00030 
00031     auto iter = utf8Str.begin();
00032     auto start = iter;
00033     for (size_t i = 0; i < bufferSize; i++) {
00034         uint32_t cp(inBuf[i] & 0xff);
00035         iter = utf8::append(cp, iter);
00036     }
00037     utf8Str.resize(iter-start);
00038 
00039     return Utf8String(utf8Str);
00040 }
00041 
00042 Utf8String::Utf8String(const string & in, bool check)
00043     : data_(in)
00044 {
00045     if (check)
00046     {
00047         // Check if we find an invalid encoding
00048         string::const_iterator end_it = utf8::find_invalid(in.begin(), in.end());
00049         if (end_it != in.end())
00050         {
00051             throw ML::Exception("Invalid sequence within utf-8 string");
00052         }
00053     }
00054 }
00055 
00056 Utf8String::Utf8String(string && in, bool check)
00057     : data_(std::move(in))
00058 {
00059     if (check)
00060     {
00061         // Check if we find an invalid encoding
00062         string::const_iterator end_it = utf8::find_invalid(data_.begin(), data_.end());
00063         if (end_it != data_.end())
00064         {
00065             throw ML::Exception("Invalid sequence within utf-8 string");
00066         }
00067     }
00068 }
00069 
00070 Utf8String::const_iterator
00071 Utf8String::begin() const
00072 {
00073     return Utf8String::const_iterator(data_.begin(), data_.begin(), data_.end()) ;
00074 }
00075 
00076 Utf8String::const_iterator
00077 Utf8String::end() const
00078 {
00079     return Utf8String::const_iterator(data_.end(), data_.begin(), data_.end()) ;
00080 }
00081 
00082 Utf8String &Utf8String::operator+=(const Utf8String &utf8str)
00083 {
00084     data_ += utf8str.data_;
00085     return *this;
00086 }
00087 
00088 std::ostream & operator << (std::ostream & stream, const Utf8String & str)
00089 {
00090     stream << string(str.rawData(), str.rawLength()) ;
00091     return stream;
00092 }
00093 
00094 void
00095 Utf8String::
00096 serialize(ML::DB::Store_Writer & store) const
00097 {
00098     store << data_;
00099 }
00100 
00101 void
00102 Utf8String::
00103 reconstitute(ML::DB::Store_Reader & store)
00104 {
00105     store >> data_;
00106 }
00107     
00108 string Utf8String::extractAscii()
00109 {
00110     string s;
00111     for(auto it = begin(); it != end(); it++) {
00112         char c = *it;
00113         if (c >= ' ' && c < 127) {
00114             s += c;
00115         } else {
00116             s += '?';
00117         }
00118     }
00119     return s;
00120 }
00121 
00122 } // namespace Datacratic
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator