The Battle for Wesnoth  1.13.4+dev
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tokenizer.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2004 - 2009 by Philippe Plantier <[email protected]>
3  Copyright (C) 2010 - 2016 by Guillaume Melquiond <[email protected]>
4  Part of the Battle for Wesnoth Project http://www.wesnoth.org
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY.
12 
13  See the COPYING file for more details.
14 */
15 
16 /** @file */
17 
18 #include "global.hpp"
19 
20 #include "wesconfig.h"
22 
23 
24 tokenizer::tokenizer(std::istream& in) :
25  current_(EOF),
26  lineno_(1),
27  startlineno_(0),
28  textdomain_(PACKAGE),
29  file_(),
30  token_(),
31  in_(in)
32 {
33  for (int c = 0; c < 128; ++c)
34  {
35  int t = 0;
36  if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
37  t = TOK_ALPHA;
38  } else if (c >= '0' && c <= '9') {
39  t = TOK_NUMERIC;
40  } else if (c == ' ' || c == '\t') {
41  t = TOK_SPACE;
42  }
43  char_types_[c] = t;
44  }
45  in_.stream().exceptions(std::ios_base::badbit);
47 }
48 
50 {
51  in_.stream().clear(std::ios_base::goodbit);
52  in_.stream().exceptions(std::ios_base::goodbit);
53 }
54 
56 {
57 #if DEBUG_TOKENIZER
58  previous_token_ = token_;
59 #endif
60  token_.value.clear();
61 
62  // Dump spaces and inlined comments
63  for(;;)
64  {
65  while (is_space(current_)) {
67  }
68  if (current_ != 254)
69  break;
70  skip_comment();
71  // skip the line end
73  }
74 
75  if (current_ == '#')
76  skip_comment();
77 
79 
80  switch(current_) {
81  case EOF:
83  break;
84 
85  case '<':
86  if (peek_char() != '<') {
89  break;
90  }
93  for (;;) {
94  next_char();
95  if (current_ == EOF) {
97  break;
98  }
99  if (current_ == '>' && peek_char() == '>') {
100  next_char_fast();
101  break;
102  }
103  token_.value += current_;
104  }
105  break;
106 
107  case '"':
109  for (;;) {
110  next_char();
111  if (current_ == EOF) {
113  break;
114  }
115  if (current_ == '"') {
116  if (peek_char() != '"') break;
117  next_char_fast();
118  }
119  if (current_ == 254) {
120  skip_comment();
121  --lineno_;
122  continue;
123  }
124  token_.value += current_;
125  }
126  break;
127 
128  case '[': case ']': case '/': case '\n': case '=': case ',': case '+':
131  break;
132 
133  case '_':
134  if (!is_alnum(peek_char())) {
137  break;
138  }
139  // no break
140 
141  default:
142  if (is_alnum(current_)) {
144  do {
145  token_.value += current_;
146  next_char_fast();
147  while (current_ == 254) {
148  skip_comment();
149  next_char_fast();
150  }
151  } while (is_alnum(current_));
152  } else {
154  token_.value += current_;
155  next_char();
156  }
157  return token_;
158  }
159 
160  if (current_ != EOF)
161  next_char();
162 
163  return token_;
164 }
165 
166 bool tokenizer::skip_command(char const *cmd)
167 {
168  for (; *cmd; ++cmd) {
169  next_char_fast();
170  if (current_ != *cmd) return false;
171  }
172  next_char_fast();
173  if (!is_space(current_)) return false;
174  next_char_fast();
175  return true;
176 }
177 
179 {
180  next_char_fast();
181  if (current_ == '\n' || current_ == EOF) return;
182  std::string *dst = nullptr;
183 
184  if (current_ == 't')
185  {
186  if (!skip_command("extdomain")) goto fail;
187  dst = &textdomain_;
188  }
189  else if (current_ == 'l')
190  {
191  if (!skip_command("ine")) goto fail;
192  lineno_ = 0;
193  while (is_num(current_)) {
194  lineno_ = lineno_ * 10 + (current_ - '0');
195  next_char_fast();
196  }
197  if (!is_space(current_)) goto fail;
198  next_char_fast();
199  dst = &file_;
200  }
201  else
202  {
203  fail:
204  while (current_ != '\n' && current_ != EOF) {
205  next_char_fast();
206  }
207  return;
208  }
209 
210  dst->clear();
211  while (current_ != '\n' && current_ != EOF) {
212  *dst += current_;
213  next_char_fast();
214  }
215 }
void skip_comment()
Definition: tokenizer.cpp:178
void next_char()
Definition: tokenizer.hpp:99
int startlineno_
Definition: tokenizer.hpp:97
const GLfloat * c
Definition: glew.h:12741
void next_char_fast()
Definition: tokenizer.hpp:106
GLdouble GLdouble t
Definition: glew.h:1366
GLuint in
Definition: glew.h:9261
const token & next_token()
Definition: tokenizer.cpp:55
token_type
Definition: tokenizer.hpp:35
GLenum GLenum dst
Definition: glew.h:2392
bool is_alnum(int c) const
Definition: tokenizer.hpp:158
std::string file_
Definition: tokenizer.hpp:172
bool is_space(int c) const
Definition: tokenizer.hpp:148
int lineno_
Definition: tokenizer.hpp:96
#define PACKAGE
Definition: wesconfig.h:23
Some defines: VERSION, PACKAGE, MIN_SAVEGAME_VERSION.
bool is_num(int c) const
Definition: tokenizer.hpp:153
char char_types_[128]
Definition: tokenizer.hpp:178
int peek_char()
Definition: tokenizer.hpp:130
std::string textdomain_
Definition: tokenizer.hpp:171
int current_
Definition: tokenizer.hpp:95
#define c
Definition: glew.h:12743
buffered_istream in_
Definition: tokenizer.hpp:177
std::istream & stream()
Returns the owned stream.
token token_
Definition: tokenizer.hpp:173
bool skip_command(char const *cmd)
Returns true if the next characters are the one from cmd followed by a space.
Definition: tokenizer.cpp:166
GLsizei const GLcharARB ** string
Definition: glew.h:4503
std::string value
Definition: tokenizer.hpp:54
token_type type
Definition: tokenizer.hpp:53