RTBKit  0.9
Open-source framework to create real-time ad bidding systems.
soa/types/testing/string_test.cc
00001 /* string_test.cc
00002    Copyright (c) 2012 Datacratic.  All rights reserved.
00003 */
00004 
00005 #define BOOST_TEST_MAIN
00006 #define BOOST_TEST_DYN_LINK
00007 #include "soa/types/string.h"
00008 #include <boost/test/unit_test.hpp>
00009 #include <boost/regex/icu.hpp>
00010 #include <boost/regex.hpp>
00011 #include "soa/jsoncpp/json.h"
00012 #include "jml/arch/format.h"
00013 
00014 using namespace std;
00015 using namespace ML;
00016 using namespace Datacratic;
00017 
00018 
00019 BOOST_AUTO_TEST_CASE( test_print_format )
00020 {
00021     std::string raw = "saint-jérôme";
00022     // Test 1 - Iterate through the raw string with normal iterators we should not find 'é'
00023     unsigned numAccentedChars = 0;
00024     for(string::const_iterator it = raw.begin() ; it != raw.end(); ++it)
00025     {
00026         if (*it ==  L'é' || *it ==  L'ô')
00027             numAccentedChars++;
00028     }
00029     BOOST_CHECK_EQUAL(numAccentedChars, 0);
00030     Utf8String utf8(raw);
00031     // Now iterate through the utf8 string
00032     for (Utf8String::const_iterator it = utf8.begin(); it != utf8.end(); ++it)
00033     {
00034         if (*it ==  L'é' || *it ==  L'ô')
00035             numAccentedChars++;
00036     }
00037     BOOST_CHECK_EQUAL(numAccentedChars, 2);
00038     // Now add another string to it
00039     std::string raw2 = "saint-jérôme2";
00040     utf8+=raw2;
00041     numAccentedChars=0;
00042     // Now iterate through the utf8 string
00043     for (Utf8String::const_iterator it = utf8.begin(); it != utf8.end(); ++it)
00044     {
00045         if (*it ==  L'é' || *it ==  L'ô')
00046             numAccentedChars++;
00047     }
00048     BOOST_CHECK_EQUAL(numAccentedChars, 4);
00049     string theString(utf8.rawData(), utf8.rawLength());
00050     size_t found = raw.find(L'é') ;
00051     BOOST_CHECK_EQUAL(found, string::npos);
00052     // We do a normal regex first
00053     boost::regex reg("é");
00054     std::string raw4 = "saint-jérôme";
00055     BOOST_CHECK_EQUAL( boost::regex_search(raw4, reg), true);
00056     // Please see Saint-j\xC3A9r\xC3B4me for UTF-8 character table
00057     boost::u32regex withHex = boost::make_u32regex("saint-j\xc3\xa9r\xc3\xb4me");
00058     boost::u32regex withoutHex = boost::make_u32regex(L"[a-z]*-jérôme");
00059     boost::match_results<std::string::const_iterator> matches;
00060     BOOST_CHECK_EQUAL(boost::u32regex_search(raw, matches, withoutHex), true);
00061     if (boost::u32regex_search(raw, matches, withoutHex))
00062     {
00063         for (boost::match_results< std::string::const_iterator >::const_iterator i = matches.begin(); i != matches.end(); ++i)
00064         {
00065                 if (i->matched) std::cout << "matches :       [" << i->str() << "]\n";
00066                 else            std::cout << "doesn't match : [" << i->str() << "]\n";
00067         }
00068     }
00069     else
00070     {
00071         cerr << "did not get a match without hex" << endl;
00072     }
00073     BOOST_CHECK_EQUAL(boost::u32regex_search(raw, matches, withHex), true);
00074 }
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator