RTBKit
0.9
Open-source framework to create real-time ad bidding systems.
|
00001 // Copyright 2006 Nemanja Trifunovic 00002 00003 /* 00004 Permission is hereby granted, free of charge, to any person or organization 00005 obtaining a copy of the software and accompanying documentation covered by 00006 this license (the "Software") to use, reproduce, display, distribute, 00007 execute, and transmit the Software, and to prepare derivative works of the 00008 Software, and to permit third-parties to whom the Software is furnished to 00009 do so, all subject to the following: 00010 00011 The copyright notices in the Software and this entire statement, including 00012 the above license grant, this restriction and the following disclaimer, 00013 must be included in all copies of the Software, in whole or in part, and 00014 all derivative works of the Software, unless such copies or derivative 00015 works are solely in the form of machine-executable object code generated by 00016 a source language processor. 00017 00018 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 00019 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00020 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 00021 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 00022 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 00023 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 00024 DEALINGS IN THE SOFTWARE. 00025 */ 00026 00027 00028 #ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 00029 #define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 00030 00031 #include "core.h" 00032 00033 namespace utf8 00034 { 00035 namespace unchecked 00036 { 00037 template <typename octet_iterator> 00038 octet_iterator append(uint32_t cp, octet_iterator result) 00039 { 00040 if (cp < 0x80) // one octet 00041 *(result++) = static_cast<uint8_t>(cp); 00042 else if (cp < 0x800) { // two octets 00043 *(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0); 00044 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80); 00045 } 00046 else if (cp < 0x10000) { // three octets 00047 *(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0); 00048 *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80); 00049 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80); 00050 } 00051 else { // four octets 00052 *(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0); 00053 *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80); 00054 *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80); 00055 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80); 00056 } 00057 return result; 00058 } 00059 00060 template <typename octet_iterator> 00061 uint32_t next(octet_iterator& it) 00062 { 00063 uint32_t cp = internal::mask8(*it); 00064 int length = utf8::internal::sequence_length(it); 00065 switch (length) { 00066 case 1: 00067 break; 00068 case 2: 00069 it++; 00070 cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); 00071 break; 00072 case 3: 00073 ++it; 00074 cp = ((cp << 12) & 0xffff) + ((internal::mask8(*it) << 6) & 0xfff); 00075 ++it; 00076 cp += (*it) & 0x3f; 00077 break; 00078 case 4: 00079 ++it; 00080 cp = ((cp << 18) & 0x1fffff) + ((internal::mask8(*it) << 12) & 0x3ffff); 00081 ++it; 00082 cp += (internal::mask8(*it) << 6) & 0xfff; 00083 ++it; 00084 cp += (*it) & 0x3f; 00085 break; 00086 } 00087 ++it; 00088 return cp; 00089 } 00090 00091 template <typename octet_iterator> 00092 uint32_t peek_next(octet_iterator it) 00093 { 00094 return next(it); 00095 } 00096 00097 template <typename octet_iterator> 00098 uint32_t prior(octet_iterator& it) 00099 { 00100 while (internal::is_trail(*(--it))) ; 00101 octet_iterator temp = it; 00102 return next(temp); 00103 } 00104 00105 // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous) 00106 template <typename octet_iterator> 00107 inline uint32_t previous(octet_iterator& it) 00108 { 00109 return prior(it); 00110 } 00111 00112 template <typename octet_iterator, typename distance_type> 00113 void advance (octet_iterator& it, distance_type n) 00114 { 00115 for (distance_type i = 0; i < n; ++i) 00116 next(it); 00117 } 00118 00119 template <typename octet_iterator> 00120 typename std::iterator_traits<octet_iterator>::difference_type 00121 distance (octet_iterator first, octet_iterator last) 00122 { 00123 typename std::iterator_traits<octet_iterator>::difference_type dist; 00124 for (dist = 0; first < last; ++dist) 00125 next(first); 00126 return dist; 00127 } 00128 00129 template <typename u16bit_iterator, typename octet_iterator> 00130 octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) 00131 { 00132 while (start != end) { 00133 uint32_t cp = internal::mask16(*start++); 00134 // Take care of surrogate pairs first 00135 if (internal::is_lead_surrogate(cp)) { 00136 uint32_t trail_surrogate = internal::mask16(*start++); 00137 cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; 00138 } 00139 result = append(cp, result); 00140 } 00141 return result; 00142 } 00143 00144 template <typename u16bit_iterator, typename octet_iterator> 00145 u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) 00146 { 00147 while (start < end) { 00148 uint32_t cp = next(start); 00149 if (cp > 0xffff) { //make a surrogate pair 00150 *result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET); 00151 *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); 00152 } 00153 else 00154 *result++ = static_cast<uint16_t>(cp); 00155 } 00156 return result; 00157 } 00158 00159 template <typename octet_iterator, typename u32bit_iterator> 00160 octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) 00161 { 00162 while (start != end) 00163 result = append(*(start++), result); 00164 00165 return result; 00166 } 00167 00168 template <typename octet_iterator, typename u32bit_iterator> 00169 u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) 00170 { 00171 while (start < end) 00172 (*result++) = next(start); 00173 00174 return result; 00175 } 00176 00177 // The iterator class 00178 template <typename octet_iterator> 00179 class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> { 00180 octet_iterator it; 00181 public: 00182 iterator () {}; 00183 explicit iterator (const octet_iterator& octet_it): it(octet_it) {} 00184 // the default "big three" are OK 00185 octet_iterator base () const { return it; } 00186 uint32_t operator * () const 00187 { 00188 octet_iterator temp = it; 00189 return next(temp); 00190 } 00191 bool operator == (const iterator& rhs) const 00192 { 00193 return (it == rhs.it); 00194 } 00195 bool operator != (const iterator& rhs) const 00196 { 00197 return !(operator == (rhs)); 00198 } 00199 iterator& operator ++ () 00200 { 00201 std::advance(it, internal::sequence_length(it)); 00202 return *this; 00203 } 00204 iterator operator ++ (int) 00205 { 00206 iterator temp = *this; 00207 std::advance(it, internal::sequence_length(it)); 00208 return temp; 00209 } 00210 iterator& operator -- () 00211 { 00212 prior(it); 00213 return *this; 00214 } 00215 iterator operator -- (int) 00216 { 00217 iterator temp = *this; 00218 prior(it); 00219 return temp; 00220 } 00221 }; // class iterator 00222 00223 } // namespace utf8::unchecked 00224 } // namespace utf8 00225 00226 00227 #endif // header guard 00228