The Battle for Wesnoth  1.13.4+dev
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
string_utils.hpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2003 by David White <[email protected]>
3  Copyright (C) 2005 - 2016 by Guillaume Melquiond <[email protected]>
4  Part of the Battle for Wesnoth Project http://www.wesnoth.org/
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY.
12 
13  See the COPYING file for more details.
14 */
15 
16 #ifndef SERIALIZATION_STRING_UTILS_HPP_INCLUDED
17 #define SERIALIZATION_STRING_UTILS_HPP_INCLUDED
18 
19 #include <algorithm>
20 #include <map>
21 #include <set>
22 #include <sstream>
23 #include <string>
24 #include <vector>
25 #include <boost/next_prior.hpp>
26 
27 class t_string;
28 
29 namespace utils {
30 
31 extern const std::string unicode_minus;
32 extern const std::string unicode_en_dash;
33 extern const std::string unicode_em_dash;
34 extern const std::string unicode_figure_dash;
36 extern const std::string unicode_bullet;
37 
38 bool isnewline(const char c);
39 bool portable_isspace(const char c);
40 bool notspace(char c);
41 
42 enum { REMOVE_EMPTY = 0x01, /**< REMOVE_EMPTY : remove empty elements. */
43  STRIP_SPACES = 0x02 /**< STRIP_SPACES : strips leading and trailing blank spaces. */
44 };
45 
46 /// Splits a (comma-)separated string into a vector of pieces.
47 std::vector< std::string > split(std::string const &val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES);
48 /// Splits a (comma-)separated string into a set of pieces.
49 /// See split() for the meanings of the parameters.
50 inline std::set< std::string > set_split(std::string const &val, const char c = ',', const int flags = REMOVE_EMPTY | STRIP_SPACES)
51 {
52  std::vector< std::string > vec_split = split(val, c, flags);
53  return std::set< std::string >(vec_split.begin(), vec_split.end());
54 }
55 
56 /**
57  * Splits a string based on two separators into a map.
58  * major: the separator between elements of the map
59  * minor: the separator between keys and values in one element
60  *
61  * For example, the string 'a:b,c:d,e:f' would be parsed into:
62  * a => b
63  * c => d
64  * e => f
65 */
66 std::map< std::string, std::string > map_split(
67  std::string const &val
68  , char major = ','
69  , char minor = ':'
71  , std::string const& default_value = "");
72 
73 /**
74  * Splits a string based either on a separator where text within parenthesis
75  * is protected from splitting (Note that one can use the same character for
76  * both the left and right parenthesis. In this mode it usually makes only
77  * sense to have one character for the left and right parenthesis.)
78  * or if the separator == 0 it splits a string into an odd number of parts:
79  * - The part before the first '(',
80  * - the part between the first '('
81  * - and the matching right ')', etc ...
82  * and the remainder of the string.
83  * Note that this will find the first matching char in the left string
84  * and match against the corresponding char in the right string.
85  * In this mode, a correctly processed string should return with
86  * an odd number of elements to the vector and
87  * an empty elements are never removed as they are placeholders.
88  * hence REMOVE EMPTY only works for the separator split.
89  *
90  * parenthetical_split("a(b)c{d}e(f{g})h",0,"({",")}") should return
91  * a vector of <"a","b","c","d","e","f{g}","h">
92  */
93 std::vector< std::string > parenthetical_split(std::string const &val,
94  const char separator = 0 , std::string const &left="(",
95  std::string const &right=")",const int flags = REMOVE_EMPTY | STRIP_SPACES);
96 
97 /**
98  * Similar to parenthetical_split, but also expands embedded square brackets.
99  * Separator must be specified and number of entries in each square bracket
100  * must match in each section.
101  * Leading zeros are preserved if specified between square brackets.
102  * An asterisk as in [a*n] indicates to expand 'a' n times
103  *
104  * This is useful to expand animation WML code.
105  * Examples:
106  * square_parenthetical_split("a[1-3](1,[5,6,7]),b[8,9]",",") should return
107  * <"a1(1,5)","a2(1,6)","a3(1,7)","b8","b9">
108  * square_parenthetical_split("abc[07-10]") should return
109  * <"abc07","abc08","abc09","abc10">
110  * square_parenthetical_split("a[1,2]b[3-4]:c[5,6]") should return
111  * <"a1b3:c5","a2b4:c6">
112  * square_parenthetical_split("abc[3,1].png") should return
113  * <"abc3.png","abc2.png","abc1.png">
114  * square_parenthetical_split("abc[de,xyz]") should return
115  * <"abcde","abcxyz">
116  * square_parenthetical_split("abc[1*3]") should return
117  * <"abc1","abc1","abc1">
118  */
119 std::vector< std::string > square_parenthetical_split(std::string const &val,
120  const char separator = ',' , std::string const &left="([",
121  std::string const &right=")]",const int flags = REMOVE_EMPTY | STRIP_SPACES);
122 
123 /**
124  * Generates a new string joining container items in a list.
125  *
126  * @param v A container with elements.
127  * @param s List delimiter.
128  */
129 template <typename T>
130 std::string join(T const &v, const std::string& s = ",")
131 {
132  std::stringstream str;
133  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
134  str << *i;
135  if (boost::next(i) != v.end())
136  str << s;
137  }
138 
139  return str.str();
140 }
141 
142 template <typename T>
144  const T& v
145  , const std::string& major = ","
146  , const std::string& minor = ":")
147 {
148  std::stringstream str;
149  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
150  str << i->first << minor << i->second;
151  if (boost::next(i) != v.end())
152  str << major;
153  }
154 
155  return str.str();
156 }
157 
158 /**
159  * Generates a new string containing a bullet list.
160  *
161  * List items are preceded by the indentation blanks, a bullet string and
162  * another blank; all but the last item are followed by a newline.
163  *
164  * @param v A container with elements.
165  * @param indent Number of indentation blanks.
166  * @param bullet The leading bullet string.
167  */
168 template<typename T>
169 std::string bullet_list(const T& v, size_t indent = 4, const std::string& bullet = unicode_bullet)
170 {
171  std::ostringstream str;
172 
173  for(typename T::const_iterator i = v.begin(); i != v.end(); ++i) {
174  if(i != v.begin()) {
175  str << '\n';
176  }
177 
178  str << std::string(indent, ' ') << bullet << ' ' << *i;
179  }
180 
181  return str.str();
182 }
183 
184 /**
185  * Indent a block of text.
186  *
187  * Only lines with content are changed; empty lines are left intact. However,
188  * if @a string is an empty string itself, the indentation unit with the
189  * specified @a indent_size will be returned instead.
190  *
191  * @param string Text to indent.
192  * @param indent_size Number of indentation units to use.
193  */
194 std::string indent(const std::string& string, size_t indent_size = 4);
195 
196 /**
197  * This function is identical to split(), except it does not split
198  * when it otherwise would if the previous character was identical to the parameter 'quote'.
199  * i.e. it does not split quoted commas.
200  * This method was added to make it possible to quote user input,
201  * particularly so commas in user input will not cause visual problems in menus.
202  *
203  * @todo Why not change split()? That would change the methods post condition.
204  */
205 std::vector< std::string > quoted_split(std::string const &val, char c= ',',
206  int flags = REMOVE_EMPTY | STRIP_SPACES, char quote = '\\');
207 std::pair< int, int > parse_range(std::string const &str);
208 std::vector< std::pair< int, int > > parse_ranges(std::string const &str);
209 int apply_modifier( const int number, const std::string &amount, const int minimum = 0);
210 
211 /* add a "+" or replace the "-" par Unicode minus */
213 { return mod[0] == '-' ?
214  (unicode_minus + std::string(mod.begin()+1, mod.end())) : ("+" + mod);}
215 
216 /** Prepends a configurable set of characters with a backslash */
217 std::string escape(const std::string &str, const char *special_chars);
218 
219 /**
220  * Prepend all special characters with a backslash.
221  *
222  * Special characters are:
223  * #@{}+-,\*=
224  */
225 inline std::string escape(const std::string &str)
226 { return escape(str, "#@{}+-,\\*="); }
227 
228 /** Remove all escape characters (backslash) */
229 std::string unescape(const std::string &str);
230 
231 /** Percent-escape characters in a UTF-8 string intended to be part of a URL. */
232 std::string urlencode(const std::string &str);
233 
234 /** Replace all instances of src in str with dst */
236 
237 /** Remove whitespace from the front and back of the string 'str'. */
239 
240 /** Remove whitespace from the back of the string 'str'. */
242 
243 /** Surround the string 'str' with double quotes. */
244 inline std::string quote(const std::string &str)
245 {
246  return '"' + str + '"';
247 }
248 
249 /** Convert no, false, off, 0, 0.0 to false, empty to def, and others to true */
250 bool string_bool(const std::string& str,bool def=false);
251 
252 /** Convert into a signed value (using the Unicode "−" and +0 convention */
254 
255 /** Sign with Unicode "−" if negative */
257 
258 /** Convert into a percentage (using the Unicode "−" and +0% convention */
259 inline std::string signed_percent(int val) {return signed_value(val) + "%";}
260 
261 /**
262  * Convert into a string with an SI-postfix.
263  *
264  * If the unit is to be translatable,
265  * a t_string should be passed as the third argument.
266  * _("unit_byte^B") is suggested as standard.
267  *
268  * There are no default values because they would not be translatable.
269  */
270 std::string si_string(double input, bool base2, std::string unit);
271 
272 /**
273  * Try to complete the last word of 'text' with the 'wordlist'.
274  *
275  * @param[in, out] text The parameter's usage is:
276  * - Input: Text where we try to complete the last word
277  * of.
278  * - Output: Text with completed last word.
279  * @param[in, out] wordlist
280  * The parameter's usage is:
281  * - Inout: A vector of strings to complete against.
282  * - Output: A vector of strings that matched 'text'.
283  *
284  * @retval true iff text is just one word (no spaces)
285  */
286 bool word_completion(std::string& text, std::vector<std::string>& wordlist);
287 
288 /** Check if a message contains a word. */
289 bool word_match(const std::string& message, const std::string& word);
290 
291 /**
292  * Match using '*' as any number of characters (including none), and '?' as any
293  * one character.
294  */
295 bool wildcard_string_match(const std::string& str, const std::string& match);
296 
297 /**
298  * Check if the username contains only valid characters.
299  *
300  * (all alpha-numeric characters plus underscore and hyphen)
301  */
302 bool isvalid_username(const std::string &login);
303 
304 /**
305  * Check if the username pattern contains only valid characters.
306  *
307  * (all alpha-numeric characters plus underscore, hyphen,
308  * question mark and asterisk)
309  */
310 bool isvalid_wildcard(const std::string &login);
311 
312 typedef std::map< std::string, t_string > string_map;
313 
314 /**
315  * Truncates a string to a given utf-8 character count and then appends an ellipsis.
316  */
317 void ellipsis_truncate(std::string& str, const size_t size);
318 
319 } // end namespace utils
320 
321 #endif
std::string si_string(double input, bool base2, std::string unit)
Convert into a string with an SI-postfix.
bool isvalid_wildcard(const std::string &username)
Check if the username pattern contains only valid characters.
std::string join_map(const T &v, const std::string &major=",", const std::string &minor=":")
std::string urlencode(const std::string &str)
Percent-escape characters in a UTF-8 string intended to be part of a URL.
std::string bullet_list(const T &v, size_t indent=4, const std::string &bullet=unicode_bullet)
Generates a new string containing a bullet list.
bool isvalid_username(const std::string &username)
Check if the username contains only valid characters.
Definition: unit.hpp:95
const GLfloat * c
Definition: glew.h:12741
GLenum GLenum GLenum input
Definition: glew.h:10668
std::map< std::string, std::string > map_split(std::string const &val, char major, char minor, int flags, std::string const &default_value)
Splits a string based on two separators into a map.
REMOVE_EMPTY : remove empty elements.
std::string unescape(const std::string &str)
Remove all escape characters (backslash)
bool wildcard_string_match(const std::string &str, const std::string &match)
Match using '*' as any number of characters (including none), and '?' as any one character.
GLuint const GLfloat * val
Definition: glew.h:2614
bool notspace(const char c)
const std::string unicode_multiplication_sign
GLenum src
Definition: glew.h:2392
const std::string number
template to number regex
std::string quote(const std::string &str)
Surround the string 'str' with double quotes.
std::string & strip(std::string &str)
Remove whitespace from the front and back of the string 'str'.
STRIP_SPACES : strips leading and trailing blank spaces.
const std::string unicode_minus
void ellipsis_truncate(std::string &str, const size_t size)
Truncates a string to a given utf-8 character count and then appends an ellipsis. ...
std::string half_signed_value(int val)
Sign with Unicode "−" if negative.
std::map< std::string, t_string > string_map
const std::string unicode_en_dash
const GLdouble * v
Definition: glew.h:1359
GLenum GLenum dst
Definition: glew.h:2392
std::string & strip_end(std::string &str)
Remove whitespace from the back of the string 'str'.
const std::string unicode_em_dash
cl_event GLbitfield flags
Definition: glew.h:3070
const std::string &parameters float amount
Definition: filter.cpp:132
lu_byte right
Definition: lparser.cpp:1020
std::string indent(const std::string &string, size_t indent_size)
Indent a block of text.
std::string join(T const &v, const std::string &s=",")
Generates a new string joining container items in a list.
std::string escape(const std::string &str, const char *special_chars)
Prepends a configurable set of characters with a backslash.
std::string login()
std::pair< int, int > parse_range(std::string const &str)
std::vector< std::string > quoted_split(std::string const &val, char c, int flags, char quote)
This function is identical to split(), except it does not split when it otherwise would if the previo...
const std::string unicode_figure_dash
GLint left
Definition: glew.h:5907
std::vector< std::pair< int, int > > parse_ranges(std::string const &str)
std::string signed_percent(int val)
Convert into a percentage (using the Unicode "−" and +0% convention.
size_t i
Definition: function.cpp:1057
bool string_bool(const std::string &str, bool def)
Convert no, false, off, 0, 0.0 to false, empty to def, and others to true.
bool isnewline(const char c)
std::string replace(std::string str, const std::string &src, const std::string &dst)
Replace all instances of src in str with dst.
std::set< std::string > set_split(std::string const &val, const char c= ',', const int flags=REMOVE_EMPTY|STRIP_SPACES)
Splits a (comma-)separated string into a set of pieces.
#define next(ls)
Definition: llex.cpp:27
GLsizeiptr size
Definition: glew.h:1649
std::vector< std::string > parenthetical_split(std::string const &val, const char separator, std::string const &left, std::string const &right, const int flags)
Splits a string based either on a separator where text within parenthesis is protected from splitting...
int apply_modifier(const int number, const std::string &amount, const int minimum)
static const char * match(MatchState *ms, const char *s, const char *p)
Definition: lstrlib.cpp:409
GLsizei GLenum GLuint GLuint GLsizei char * message
Definition: glew.h:2499
std::vector< std::string > split(std::string const &val, const char c, const int flags)
Splits a (comma-)separated string into a vector of pieces.
bool portable_isspace(const char c)
std::string signed_value(int val)
Convert into a signed value (using the Unicode "−" and +0 convention.
GLdouble s
Definition: glew.h:1358
std::string print_modifier(const std::string &mod)
std::vector< std::string > square_parenthetical_split(std::string const &val, const char separator, std::string const &left, std::string const &right, const int flags)
Similar to parenthetical_split, but also expands embedded square brackets.
GLsizei const GLcharARB ** string
Definition: glew.h:4503
bool word_completion(std::string &text, std::vector< std::string > &wordlist)
Try to complete the last word of 'text' with the 'wordlist'.
const std::string unicode_bullet
bool word_match(const std::string &message, const std::string &word)
Check if a message contains a word.