The Battle for Wesnoth  1.13.4+dev
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
parser.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2003 by David White <[email protected]>
3  Copyright (C) 2005 by Guillaume Melquiond <[email protected]>
4  Copyright (C) 2005 - 2016 by Philippe Plantier <[email protected]>
5  Part of the Battle for Wesnoth Project http://www.wesnoth.org/
6 
7  This program is free software; you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation; either version 2 of the License, or
10  (at your option) any later version.
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY.
13 
14  See the COPYING file for more details.
15 */
16 
17 /**
18  * @file
19  * Read/Write & analyze WML- and config-files.
20  */
21 
22 
23 #include "serialization/parser.hpp"
24 
25 #include "config.hpp"
26 #include "log.hpp"
27 #include "gettext.hpp"
28 #include "wesconfig.h"
33 
34 #include <stack>
35 
36 #include <boost/algorithm/string/replace.hpp>
37 #include <boost/iostreams/filtering_stream.hpp>
38 #include <boost/iostreams/filter/bzip2.hpp>
39 #include <boost/iostreams/filter/gzip.hpp>
40 #include <boost/variant/static_visitor.hpp>
41 
42 static lg::log_domain log_config("config");
43 #define ERR_CF LOG_STREAM(err, log_config)
44 #define WRN_CF LOG_STREAM(warn, log_config)
45 #define LOG_CF LOG_STREAM(info, log_config)
46 
47 static const size_t max_recursion_levels = 1000;
48 
49 namespace {
50 class parser
51 {
52  parser();
53  parser(const parser&);
54  parser& operator=(const parser&);
55 public:
56  parser(config& cfg, std::istream& in,
57  abstract_validator * validator = nullptr);
58  ~parser();
59  void operator()();
60 
61 private:
62  void parse_element();
63  void parse_variable();
65  const std::string &error_string,
66  const std::string &hint_string = "",
67  const std::string &debug_string = "");
68  void error(const std::string& message, const std::string& pos_format = "");
69 
70  config& cfg_;
71  tokenizer tok_;
72  abstract_validator *validator_;
73 
74  struct element {
75  element(config *cfg, std::string const &name,
76  int start_line = 0, const std::string &file = "") :
77  cfg(cfg), name(name), start_line(start_line), file(file)
78  {}
79 
80  config* cfg;
82  int start_line;
83  std::string file;
84  };
85 
86  std::stack<element> elements;
87 };
88 
89 parser::parser(config &cfg, std::istream &in, abstract_validator * validator)
90  :cfg_(cfg),
91  tok_(in),
92  validator_(validator),
93  elements()
94 {
95 }
96 
97 
98 parser::~parser()
99 {}
100 
101 void parser::operator()()
102 {
103  cfg_.clear();
104  elements.push(element(&cfg_, ""));
105 
106  do {
107  tok_.next_token();
108 
109  switch(tok_.current_token().type) {
110  case token::LF:
111  continue;
112  case '[':
113  parse_element();
114  break;
115  case token::STRING:
116  parse_variable();
117  break;
118  default:
119  if (static_cast<unsigned char>(tok_.current_token().value[0]) == 0xEF &&
120  static_cast<unsigned char>(tok_.next_token().value[0]) == 0xBB &&
121  static_cast<unsigned char>(tok_.next_token().value[0]) == 0xBF)
122  {
123  utils::string_map i18n_symbols;
124  std::stringstream ss;
125  ss << tok_.get_start_line() << " " << tok_.get_file();
126  ERR_CF << lineno_string(i18n_symbols,
127  ss.str(),
128  "Skipping over a utf8 BOM at $pos")
129  << '\n';
130  } else {
131  error(_("Unexpected characters at line start"));
132  }
133  break;
134  case token::END:
135  break;
136  }
137  } while (tok_.current_token().type != token::END);
138 
139  // The main element should be there. If it is not, this is a parser error.
140  assert(!elements.empty());
141 
142  if(elements.size() != 1) {
143  utils::string_map i18n_symbols;
144  i18n_symbols["tag"] = elements.top().name;
145  std::stringstream ss;
146  ss << elements.top().start_line << " " << elements.top().file;
147  error(lineno_string(i18n_symbols, ss.str(),
148  _("Missing closing tag for tag [$tag]"),
149  _("expected at $pos")), _("opened at $pos"));
150  }
151 }
152 
153 void parser::parse_element()
154 {
155  tok_.next_token();
156  std::string elname;
157  config* current_element = nullptr;
158  switch(tok_.current_token().type) {
159  case token::STRING: // [element]
160  elname = tok_.current_token().value;
161  if (tok_.next_token().type != ']')
162  error(_("Unterminated [element] tag"));
163  // Add the element
164  current_element = &(elements.top().cfg->add_child(elname));
165  elements.push(element(current_element, elname, tok_.get_start_line(), tok_.get_file()));
166  if (validator_){
167  validator_->open_tag(elname,tok_.get_start_line(),
168  tok_.get_file());
169  }
170  break;
171 
172  case '+': // [+element]
173  if (tok_.next_token().type != token::STRING)
174  error(_("Invalid tag name"));
175  elname = tok_.current_token().value;
176  if (tok_.next_token().type != ']')
177  error(_("Unterminated [+element] tag"));
178 
179  // Find the last child of the current element whose name is
180  // element
181  if (config &c = elements.top().cfg->child(elname, -1)) {
182  current_element = &c;
183  if (validator_){
184  validator_->open_tag(elname,tok_.get_start_line(),
185  tok_.get_file(),true);
186  }
187  } else {
188  current_element = &elements.top().cfg->add_child(elname);
189  if (validator_){
190  validator_->open_tag(elname,tok_.get_start_line(),
191  tok_.get_file());
192  }
193  }
194  elements.push(element(current_element, elname, tok_.get_start_line(), tok_.get_file()));
195  break;
196 
197  case '/': // [/element]
198  if(tok_.next_token().type != token::STRING)
199  error(_("Invalid closing tag name"));
200  elname = tok_.current_token().value;
201  if(tok_.next_token().type != ']')
202  error(_("Unterminated closing tag"));
203  if(elements.size() <= 1)
204  error(_("Unexpected closing tag"));
205  if(elname != elements.top().name) {
206  utils::string_map i18n_symbols;
207  i18n_symbols["tag1"] = elements.top().name;
208  i18n_symbols["tag2"] = elname;
209  std::stringstream ss;
210  ss << elements.top().start_line << " " << elements.top().file;
211  error(lineno_string(i18n_symbols, ss.str(),
212  _("Found invalid closing tag [/$tag2] for tag [$tag1]"),
213  _("opened at $pos")), _("closed at $pos"));
214  }
215  if(validator_){
216  element & el= elements.top();
217  validator_->validate(*el.cfg,el.name,el.start_line,el.file);
218  validator_->close_tag();
219  }
220  elements.pop();
221  break;
222  default:
223  error(_("Invalid tag name"));
224  }
225 }
226 
227 void parser::parse_variable()
228 {
229  config& cfg = *elements.top().cfg;
230  std::vector<std::string> variables;
231  variables.push_back("");
232 
233  while (tok_.current_token().type != '=') {
234  switch(tok_.current_token().type) {
235  case token::STRING:
236  if(!variables.back().empty())
237  variables.back() += ' ';
238  variables.back() += tok_.current_token().value;
239  break;
240  case ',':
241  if(variables.back().empty()) {
242  error(_("Empty variable name"));
243  } else {
244  variables.push_back("");
245  }
246  break;
247  default:
248  error(_("Unexpected characters after variable name (expected , or =)"));
249  break;
250  }
251  tok_.next_token();
252  }
253  if(variables.back().empty())
254  error(_("Empty variable name"));
255 
257 
258  std::vector<std::string>::const_iterator curvar = variables.begin();
259 
260  bool ignore_next_newlines = false, previous_string = false;
261  while(1) {
262  tok_.next_token();
263  assert(curvar != variables.end());
264 
265  switch (tok_.current_token().type) {
266  case ',':
267  if ((curvar+1) != variables.end()) {
268  if (buffer.translatable())
269  cfg[*curvar] = t_string(buffer);
270  else
271  cfg[*curvar] = buffer.value();
272  if(validator_){
273  validator_->validate_key (cfg,*curvar,buffer.value(),
274  tok_.get_start_line (),
275  tok_.get_file ());
276  }
277  buffer = t_string_base();
278  ++curvar;
279  } else {
280  buffer += ",";
281  }
282  break;
283  case '_':
284  tok_.next_token();
285  switch (tok_.current_token().type) {
287  error(_("Unterminated quoted string"));
288  break;
289  case token::QSTRING:
290  buffer += t_string_base(tok_.current_token().value, tok_.textdomain());
291  break;
292  default:
293  buffer += "_";
294  buffer += tok_.current_token().value;
295  break;
296  case token::END:
297  case token::LF:
298  buffer += "_";
299  goto finish;
300  }
301  break;
302  case '+':
303  ignore_next_newlines = true;
304  continue;
305  case token::STRING:
306  if (previous_string) buffer += " ";
307  //nobreak
308  default:
309  buffer += tok_.current_token().value;
310  break;
311  case token::QSTRING:
312  buffer += tok_.current_token().value;
313  break;
315  error(_("Unterminated quoted string"));
316  break;
317  case token::LF:
318  if (ignore_next_newlines) continue;
319  //nobreak
320  case token::END:
321  goto finish;
322  }
323 
324  previous_string = tok_.current_token().type == token::STRING;
325  ignore_next_newlines = false;
326  }
327 
328  finish:
329  if (buffer.translatable())
330  cfg[*curvar] = t_string(buffer);
331  else
332  cfg[*curvar] = buffer.value();
333  if(validator_){
334  validator_->validate_key (cfg,*curvar,buffer.value(),
335  tok_.get_start_line (),
336  tok_.get_file ());
337  }
338  while (++curvar != variables.end()) {
339  cfg[*curvar] = "";
340  }
341 }
342 
343 /**
344  * This function is crap. Don't use it on a string_map with prefixes.
345  */
347  std::string const &lineno,
348  std::string const &error_string,
349  std::string const &hint_string,
350  std::string const &debug_string)
351 {
352  i18n_symbols["pos"] = ::lineno_string(lineno);
353  std::string result = error_string;
354 
355  if(!hint_string.empty()) {
356  result += '\n' + hint_string;
357  }
358 
359  if(!debug_string.empty()) {
360  result += '\n' + debug_string;
361  }
362 
363  for(utils::string_map::value_type& var : i18n_symbols)
364  boost::algorithm::replace_all(result, std::string("$") + var.first, std::string(var.second));
365  return result;
366 }
367 
368 void parser::error(const std::string& error_type, const std::string& pos_format)
369 {
370  std::string hint_string = pos_format;
371 
372  if(hint_string.empty()) {
373  hint_string = _("at $pos");
374  }
375 
376  utils::string_map i18n_symbols;
377  i18n_symbols["error"] = error_type;
378 
379  std::stringstream ss;
380  ss << tok_.get_start_line() << " " << tok_.get_file();
381 
382 #ifdef DEBUG_TOKENIZER
383  i18n_symbols["value"] = tok_.current_token().value;
384  i18n_symbols["previous_value"] = tok_.previous_token().value;
385 
386  const std::string& tok_state =
387  _("Value: '$value' Previous: '$previous_value'");
388 #else
389  const std::string& tok_state = "";
390 #endif
391 
392  const std::string& message =
393  lineno_string(i18n_symbols, ss.str(), "$error", hint_string, tok_state);
394 
395  throw config::error(message);
396 }
397 
398 } // end anon namespace
399 
400 void read(config &cfg, std::istream &in, abstract_validator * validator)
401 {
402  parser(cfg, in, validator)();
403 }
404 
405 void read(config &cfg, const std::string &in, abstract_validator * validator)
406 {
407  std::istringstream ss(in);
408  parser(cfg, ss, validator)();
409 }
410 
411 template <typename decompressor>
412 void read_compressed(config &cfg, std::istream &file, abstract_validator * validator)
413 {
414  //an empty gzip file seems to confuse boost on msvc
415  //so return early if this is the case
416  if (file.peek() == EOF) {
417  return;
418  }
419  boost::iostreams::filtering_stream<boost::iostreams::input> filter;
420  filter.push(decompressor());
421  filter.push(file);
422 
423 
424 
425  // This causes especially gzip_error (and the corresponding bz2 error), std::ios_base::failure to be thrown here.
426  // save_index_class::data expects that and config_cache::read_cache and other functions are also capable of catching.
427  // Note that parser(cuff, filter,validator)(); -> tokenizer::tokenizer can throw exeptions too (meaning this functions did already throw these exceptions before this patch).
428  // We try to fix https://svn.boost.org/trac/boost/ticket/5237 by not creating empty gz files.
429  filter.exceptions(filter.exceptions() | std::ios_base::badbit);
430 
431  /*
432  * It sometimes seems the file is not empty but still no real data.
433  * Filter that case here. It might be previous test is no longer required
434  * but simply keep it.
435  */
436 
437  // on msvc filter.peek() != EOF does not imply filter.good().
438  // we never create empty compressed gzip files because boosts gzip fails at doing that.
439  // but empty compressed bz2 files are possible.
440  if(filter.peek() == EOF) {
441  LOG_CF << "Empty compressed file or error at reading a compressed file.";
442  return;
443  }
444 
445 
446  if(!filter.good()) {
447  LOG_CF << " filter.peek() != EOF but !filter.good(), this indicates a malformed gz stream, and can make wesnoth crash.";
448  }
449 
450  parser(cfg, filter,validator)();
451 }
452 
453 /// might throw a std::ios_base::failure especially a gzip_error
454 void read_gz(config &cfg, std::istream &file, abstract_validator * validator)
455 {
456  read_compressed<boost::iostreams::gzip_decompressor>(cfg, file, validator);
457 }
458 
459 /// might throw a std::ios_base::failure especially bzip2_error
460 void read_bz2(config &cfg, std::istream &file, abstract_validator * validator)
461 {
462  read_compressed<boost::iostreams::bzip2_decompressor>(cfg, file, validator);
463 }
464 
465 namespace { // helpers for write_key_val().
466  /**
467  * Copies a string fragment and converts it to a suitable format for WML.
468  * (I.e., quotes are doubled.)
469  */
470  std::string escaped_string(const std::string::const_iterator &begin,
471  const std::string::const_iterator &end)
472  {
474  std::string::const_iterator iter = begin;
475  while ( iter != end ) {
476  const char c = *iter;
477  res.append(c == '"' ? 2 : 1, c);
478  ++iter;
479  }
480  return res;
481  }
482  /**
483  * Copies a string and converts it to a suitable format for WML.
484  * (I.e., quotes are doubled.)
485  */
486  inline std::string escaped_string(const std::string &value)
487  {
488  return escaped_string(value.begin(), value.end());
489  }
490 
491  class write_key_val_visitor : public boost::static_visitor<void>
492  {
493  std::ostream &out_;
494  const unsigned level_;
495  std::string &textdomain_;
496  const std::string &key_;
497 
498  public:
499  write_key_val_visitor(std::ostream &out, unsigned level,
500  std::string &textdomain, const std::string &key)
501  : out_(out), level_(level), textdomain_(textdomain), key_(key)
502  {}
503 
504  // Generic visitor just streams "key=value".
505  template <typename T> void operator()(T const & v) const
506  { indent(); out_ << key_ << '=' << v << '\n'; }
507 
508  // Specialized visitors for things that go in quotes:
509  void operator()(boost::blank const &) const
510  { /* treat blank values as nonexistent which fits better than treating them as empty strings.*/ }
511  void operator()(std::string const &s) const
512  { indent(); out_ << key_ << '=' << '"' << escaped_string(s) << '"' << '\n'; }
513  void operator()(t_string const &s) const;
514 
515  private:
516  void indent() const
517  { for ( unsigned i = 0; i < level_; ++i ) out_ << '\t'; }
518  };
519 
520  /**
521  * Writes all the parts of a translatable string.
522  * @note If the first part is translatable and in the wrong textdomain,
523  * the textdomain change has to happen before the attribute name.
524  * That is the reason for not outputting the key beforehand and
525  * letting this function do it.
526  */
527  void write_key_val_visitor::operator()(t_string const &value) const
528  {
529  bool first = true;
530 
531  for (t_string::walker w(value); !w.eos(); w.next())
532  {
533  if (!first)
534  out_ << " +\n";
535 
536  if (w.translatable() && w.textdomain() != textdomain_) {
537  textdomain_ = w.textdomain();
538  out_ << "#textdomain " << textdomain_ << '\n';
539  }
540 
541  indent();
542 
543  if (first)
544  out_ << key_ << '=';
545  else
546  out_ << '\t';
547 
548  if (w.translatable())
549  out_ << '_';
550 
551  out_ << '"' << escaped_string(w.begin(), w.end()) << '"';
552  first = false;
553  }
554  out_ << '\n';
555  }
556 }//unnamed namespace for write_key_val() helpers.
557 
558 void write_key_val(std::ostream &out, const std::string &key,
559  const config::attribute_value &value, unsigned level,
560  std::string& textdomain)
561 {
562  value.apply_visitor(write_key_val_visitor(out, level, textdomain, key));
563 }
564 
565 void write_open_child(std::ostream &out, const std::string &child, unsigned int level)
566 {
567  out << std::string(level, '\t') << '[' << child << "]\n";
568 }
569 
570 void write_close_child(std::ostream &out, const std::string &child, unsigned int level)
571 {
572  out << std::string(level, '\t') << "[/" << child << "]\n";
573 }
574 
575 static void write_internal(config const &cfg, std::ostream &out, std::string& textdomain, size_t tab = 0)
576 {
577  if (tab > max_recursion_levels)
578  throw config::error("Too many recursion levels in config write");
579 
580  for (const config::attribute &i : cfg.attribute_range()) {
581  if (!config::valid_id(i.first)) {
582  ERR_CF << "Config contains invalid attribute name '" << i.first << "', skipping...\n";
583  continue;
584  }
585  write_key_val(out, i.first, i.second, tab, textdomain);
586  }
587 
588  for (const config::any_child &item : cfg.all_children_range())
589  {
590  if (!config::valid_id(item.key)) {
591  ERR_CF << "Config contains invalid tag name '" << item.key << "', skipping...\n";
592  continue;
593  }
594  write_open_child(out, item.key, tab);
595  write_internal(item.cfg, out, textdomain, tab + 1);
596  write_close_child(out, item.key, tab);
597  }
598 }
599 
600 static void write_internal(configr_of const &cfg, std::ostream &out, std::string& textdomain, size_t tab = 0)
601 {
602  if (tab > max_recursion_levels)
603  throw config::error("Too many recursion levels in config write");
604  if (cfg.data_) {
605  write_internal(*cfg.data_, out, textdomain, tab);
606  }
607 
608  for (const auto &pair: cfg.subtags_)
609  {
610  assert(pair.first && pair.second);
611  if (!config::valid_id(*pair.first)) {
612  ERR_CF << "Config contains invalid tag name '" << *pair.first << "', skipping...\n";
613  continue;
614  }
615  write_open_child(out, *pair.first, tab);
616  write_internal(*pair.second, out, textdomain, tab + 1);
617  write_close_child(out, *pair.first, tab);
618  }
619 }
620 
621 void write(std::ostream &out, configr_of const &cfg, unsigned int level)
622 {
623  std::string textdomain = PACKAGE;
624  write_internal(cfg, out, textdomain, level);
625 }
626 
627 template <typename compressor>
628 void write_compressed(std::ostream &out, configr_of const &cfg)
629 {
630  boost::iostreams::filtering_stream<boost::iostreams::output> filter;
631  filter.push(compressor());
632  filter.push(out);
633 
634  write(filter, cfg);
635  // prevent empty gz files because of https://svn.boost.org/trac/boost/ticket/5237
636  filter << "\n";
637 }
638 
639 void write_gz(std::ostream &out, configr_of const &cfg)
640 {
641  write_compressed<boost::iostreams::gzip_compressor>(out, cfg);
642 }
643 
644 void write_bz2(std::ostream &out, configr_of const &cfg)
645 {
646  write_compressed<boost::iostreams::bzip2_compressor>(out, cfg);
647 }
std::string lineno_string(const std::string &lineno)
#define ERR_CF
Definition: parser.cpp:43
void write_gz(std::ostream &out, configr_of const &cfg)
Definition: parser.cpp:639
void write_compressed(std::ostream &out, configr_of const &cfg)
Definition: parser.cpp:628
GLint level
Definition: glew.h:1220
Abstract baseclass for the tokenizer.
Definition: tokenizer.hpp:58
bool translatable() const
Definition: tstring.hpp:96
const GLfloat * c
Definition: glew.h:12741
static lg::log_domain log_config("config")
static l_noret error(LoadState *S, const char *why)
Definition: lundump.cpp:29
void write_key_val(std::ostream &out, const std::string &key, const config::attribute_value &value, unsigned level, std::string &textdomain)
Definition: parser.cpp:558
void write_bz2(std::ostream &out, configr_of const &cfg)
Definition: parser.cpp:644
attribute_map::value_type attribute
Definition: config.hpp:393
V::result_type apply_visitor(const V &visitor) const
Applies a visitor to the underlying variant.
Definition: config.hpp:377
This file contains information about validation abstract level interface.
void write_open_child(std::ostream &out, const std::string &child, unsigned int level)
Definition: parser.cpp:565
Definitions for the interface to Wesnoth Markup Language (WML).
Variant for storing WML attributes.
Definition: config.hpp:223
expression_ptr key_
Definition: formula.cpp:435
void read_gz(config &cfg, std::istream &file, abstract_validator *validator)
might throw a std::ios_base::failure especially a gzip_error
Definition: parser.cpp:454
GLuint in
Definition: glew.h:9261
void write_close_child(std::ostream &out, const std::string &child, unsigned int level)
Definition: parser.cpp:570
static bool valid_id(const std::string &id)
Definition: config.cpp:498
Used in parsing config file.
Definition: validator.hpp:36
static UNUSEDNOWARN std::string _(const char *str)
Definition: gettext.hpp:82
#define LOG_CF
Definition: parser.cpp:45
GLuint GLuint end
Definition: glew.h:1221
GLuint64EXT * result
Definition: glew.h:10727
std::map< std::string, t_string > string_map
GLubyte GLubyte GLubyte GLubyte w
Definition: glew.h:1858
const GLdouble * v
Definition: glew.h:1359
GLsizei const GLfloat * value
Definition: glew.h:1817
all_children_itors all_children_range() const
In-order iteration over all children.
Definition: config.cpp:1127
config & add_child(const std::string &key)
Definition: config.cpp:743
void read_bz2(config &cfg, std::istream &file, abstract_validator *validator)
might throw a std::ios_base::failure especially bzip2_error
Definition: parser.cpp:460
void read_compressed(config &cfg, std::istream &file, abstract_validator *validator)
Definition: parser.cpp:412
const config * data_
static int indent
Definition: log.cpp:45
GLuint buffer
Definition: glew.h:1648
#define PACKAGE
Definition: wesconfig.h:23
Some defines: VERSION, PACKAGE, MIN_SAVEGAME_VERSION.
GLuint res
Definition: glew.h:9258
const_attr_itors attribute_range() const
Definition: config.cpp:984
static void write_internal(config const &cfg, std::ostream &out, std::string &textdomain, size_t tab=0)
Definition: parser.cpp:575
size_t i
Definition: function.cpp:1057
void read(config &cfg, std::istream &in, abstract_validator *validator)
Definition: parser.cpp:400
const std::string & value() const
Definition: tstring.hpp:99
GLuint const GLchar * name
Definition: glew.h:1782
std::vector< std::pair< const std::string *, const configr_of * > > subtags_
GLint GLint GLint GLint GLint GLint GLint GLbitfield GLenum filter
Definition: glew.h:3448
static const size_t max_recursion_levels
Definition: parser.cpp:47
config & child(const std::string &key, int n=0)
Returns the nth child with the given key, or a reference to an invalid config if there is none...
Definition: config.cpp:658
Standard logging facilities (interface).
GLint * first
Definition: glew.h:1496
GLsizei GLenum GLuint GLuint GLsizei char * message
Definition: glew.h:2499
#define c
Definition: glew.h:12743
A config object defines a single node in a WML file, with access to child nodes.
Definition: config.hpp:83
GLdouble s
Definition: glew.h:1358
void write(std::ostream &out, configr_of const &cfg, unsigned int level)
Definition: parser.cpp:621
GLsizei const GLcharARB ** string
Definition: glew.h:4503