The Battle for Wesnoth  1.13.4+dev
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tokenizer.hpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2004 - 2009 by Philippe Plantier <[email protected]>
3  Copyright (C) 2010 - 2016 by Guillaume Melquiond <[email protected]>
4  Part of the Battle for Wesnoth Project http://www.wesnoth.org
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY.
12 
13  See the COPYING file for more details.
14 */
15 
16 #ifndef TOKENIZER_H_INCLUDED
17 #define TOKENIZER_H_INCLUDED
18 
19 //#define DEBUG_TOKENIZER
20 
21 #include "buffered_istream.hpp"
22 
23 #include <istream>
24 #include <string>
25 
26 class config;
27 
28 struct token
29 {
30  token() :
31  type(END),
32  value()
33  {}
34 
36  {
41 
42  LF = '\n',
43  EQUALS = '=',
44  COMMA = ',',
45  PLUS = '+',
46  SLASH = '/',
47  OPEN_BRACKET = '[',
49  UNDERSCORE = '_',
51  };
52 
55 };
56 
57 /** Abstract baseclass for the tokenizer. */
58 class tokenizer
59 {
60 public:
61  tokenizer(std::istream& in);
62  ~tokenizer();
63 
64  const token &next_token();
65 
66  const token &current_token() const
67  {
68  return token_;
69  }
70 
71 #ifdef DEBUG_TOKENIZER
72  const token &previous_token() const
73  {
74  return previous_token_;
75  }
76 #endif
77 
78  const std::string &textdomain() const
79  {
80  return textdomain_;
81  }
82 
83  const std::string &get_file() const
84  {
85  return file_;
86  }
87 
88  int get_start_line() const
89  {
90  return startlineno_;
91  }
92 
93 private:
94  tokenizer();
95  int current_;
96  int lineno_;
98 
99  void next_char()
100  {
101  if (UNLIKELY(current_ == '\n'))
102  ++lineno_;
103  next_char_fast();
104  }
105 
107  {
108  do {
109  current_ = in_.get();
110  } while (UNLIKELY(current_ == '\r'));
111 #if 0
112  /// @todo disabled untill campaign server is fixed
113  if(LIKELY(in_.good())) {
114  current_ = in_.get();
115  if (UNLIKELY(current_ == '\r'))
116  {
117  // we assume that there is only one '\r'
118  if(LIKELY(in_.good())) {
119  current_ = in_.get();
120  } else {
121  current_ = EOF;
122  }
123  }
124  } else {
125  current_ = EOF;
126  }
127 #endif
128  }
129 
130  int peek_char()
131  {
132  return in_.peek();
133  }
134 
135  enum
136  {
137  TOK_NONE = 0,
141  };
142 
143  int char_type(unsigned c) const
144  {
145  return c < 128 ? char_types_[c] : 0;
146  }
147 
148  bool is_space(int c) const
149  {
150  return (char_type(c) & TOK_SPACE) == TOK_SPACE;
151  }
152 
153  bool is_num(int c) const
154  {
155  return (char_type(c) & TOK_NUMERIC) == TOK_NUMERIC;
156  }
157 
158  bool is_alnum(int c) const
159  {
160  return (char_type(c) & (TOK_ALPHA | TOK_NUMERIC)) != TOK_NONE;
161  }
162 
163  void skip_comment();
164 
165  /**
166  * Returns true if the next characters are the one from @a cmd
167  * followed by a space. Skips all the matching characters.
168  */
169  bool skip_command(char const *cmd);
170 
174 #ifdef DEBUG_TOKENIZER
175  token previous_token_;
176 #endif
178  char char_types_[128];
179 };
180 
181 #endif
182 
void skip_comment()
Definition: tokenizer.cpp:178
void next_char()
Definition: tokenizer.hpp:99
token()
Definition: tokenizer.hpp:30
int get_start_line() const
Definition: tokenizer.hpp:88
int startlineno_
Definition: tokenizer.hpp:97
Abstract baseclass for the tokenizer.
Definition: tokenizer.hpp:58
const GLfloat * c
Definition: glew.h:12741
#define UNLIKELY(a)
Definition: util.hpp:426
GLuint GLuint GLsizei GLenum type
Definition: glew.h:1221
const std::string & textdomain() const
Definition: tokenizer.hpp:78
int peek()
Gets a character from the buffer.
Helper class for buffering a std::istream.
void next_char_fast()
Definition: tokenizer.hpp:106
GLuint in
Definition: glew.h:9261
const token & next_token()
Definition: tokenizer.cpp:55
int get()
Gets and consumes a character from the buffer.
token_type
Definition: tokenizer.hpp:35
GLsizei const GLfloat * value
Definition: glew.h:1817
bool is_alnum(int c) const
Definition: tokenizer.hpp:158
std::string file_
Definition: tokenizer.hpp:172
bool is_space(int c) const
Definition: tokenizer.hpp:148
int lineno_
Definition: tokenizer.hpp:96
int char_type(unsigned c) const
Definition: tokenizer.hpp:143
const token & current_token() const
Definition: tokenizer.hpp:66
Helper class for buffering a std::istream.
bool is_num(int c) const
Definition: tokenizer.hpp:153
char char_types_[128]
Definition: tokenizer.hpp:178
int peek_char()
Definition: tokenizer.hpp:130
std::string textdomain_
Definition: tokenizer.hpp:171
int current_
Definition: tokenizer.hpp:95
#define c
Definition: glew.h:12743
buffered_istream in_
Definition: tokenizer.hpp:177
const std::string & get_file() const
Definition: tokenizer.hpp:83
A config object defines a single node in a WML file, with access to child nodes.
Definition: config.hpp:83
token token_
Definition: tokenizer.hpp:173
bool skip_command(char const *cmd)
Returns true if the next characters are the one from cmd followed by a space.
Definition: tokenizer.cpp:166
GLsizei const GLcharARB ** string
Definition: glew.h:4503
std::string value
Definition: tokenizer.hpp:54
token_type type
Definition: tokenizer.hpp:53
#define LIKELY(a)
Definition: util.hpp:425