00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "csv.h"
00011 #include "csv_local.h"
00012 #include "csv_extern.h"
00013
00014 typedef enum { GL_OK, GL_EOF, GL_FAIL } getline_status;
00015
00016 static int input_field_count(const char *, size_t, u_int32_t *);
00017 static getline_status
00018 input_getline(char **, size_t *, size_t *);
00019 static int input_put_alloc(u_int32_t **, size_t *, size_t, u_int32_t);
00020 static int input_set_offset(u_int32_t *, char *, size_t, u_int32_t);
00021
00022 static input_fmt ifmt;
00023 static u_long record_count = 0;
00024 static u_long version;
00025
00026
00027
00028
00029
00030 int
00031 input_load(input_fmt ifmt_arg, u_long version_arg)
00032 {
00033 getline_status gtl_status;
00034 DBT key, data;
00035 DBC *cursor;
00036 u_int32_t field_count, primary_key, *put_line;
00037 size_t input_len, len, put_len;
00038 int is_first, ret;
00039 char *input_line;
00040
00041 field_count = 0;
00042
00043
00044 ifmt = ifmt_arg;
00045 version = version_arg;
00046
00047
00048
00049
00050
00051
00052 if ((ret = db->cursor(db, NULL, &cursor, 0)) != 0) {
00053 dbenv->err(dbenv, ret, "DB->cursor");
00054 return (1);
00055 }
00056 memset(&key, 0, sizeof(key));
00057 memset(&data, 0, sizeof(data));
00058 if ((ret = cursor->c_get(cursor, &key, &data, DB_LAST)) != 0)
00059 if (ret == DB_NOTFOUND)
00060 primary_key = 0;
00061 else {
00062 dbenv->err(dbenv, ret, "DB->cursor: DB_LAST");
00063 return (1);
00064 }
00065 else
00066 memcpy(&primary_key, key.data, sizeof(primary_key));
00067 if ((ret = cursor->c_close(cursor)) != 0) {
00068 dbenv->err(dbenv, ret, "DBC->close");
00069 return (1);
00070 }
00071 if (verbose)
00072 dbenv->errx(dbenv,
00073 "maximum existing record in the database is %lu",
00074 (u_long)primary_key);
00075
00076 key.data = &primary_key;
00077 key.size = sizeof(primary_key);
00078 input_line = NULL;
00079 put_line = NULL;
00080 input_len = put_len = 0;
00081
00082
00083
00084
00085 for (is_first = 1; (gtl_status =
00086 input_getline(&input_line, &input_len, &len)) == GL_OK;) {
00087 ++record_count;
00088 if (verbose > 1)
00089 dbenv->errx(dbenv, "reading %lu", (u_long)record_count);
00090
00091
00092 if (is_first) {
00093 is_first = 0;
00094
00095
00096 if (input_field_count(
00097 input_line, len, &field_count) != 0)
00098 return (1);
00099
00100 }
00101
00102
00103 if (input_put_alloc(
00104 &put_line, &put_len, len, field_count) != 0)
00105 return (1);
00106
00107
00108
00109
00110
00111 if (input_set_offset(put_line,
00112 input_line, len, field_count) != 0)
00113 return (1);
00114
00115 ++primary_key;
00116
00117 memcpy(put_line + (field_count + 2), input_line, len);
00118 data.data = put_line;
00119 data.size = (field_count + 2) * sizeof(u_int32_t) + len;
00120
00121 if (verbose > 1)
00122 (void)entry_print(
00123 data.data, data.size, field_count);
00124
00125
00126 if ((ret = db->put(db, NULL, &key, &data, 0)) != 0) {
00127 dbenv->err(dbenv, ret,
00128 "DB->put: %lu", (u_long)primary_key);
00129 return (1);
00130 }
00131 }
00132
00133 if (gtl_status != GL_EOF)
00134 return (1);
00135
00136 if (verbose)
00137 dbenv->errx(dbenv,
00138 "%lu records read from the input file into the database",
00139 record_count);
00140
00141
00142
00143
00144 if ((ret = db->sync(db, 0)) != 0) {
00145 dbenv->err(dbenv, ret, "DB->sync");
00146 return (1);
00147 }
00148
00149 return (0);
00150 }
00151
00152
00153
00154
00155
00156 static getline_status
00157 input_getline(char **input_linep, size_t *input_lenp, size_t *lenp)
00158 {
00159 size_t input_len, len;
00160 int ch;
00161 char *input_line, *p, *endp;
00162
00163 input_line = *input_linep;
00164 input_len = *input_lenp;
00165
00166 p = input_line;
00167 endp = input_line + input_len;
00168
00169 for (len = 0; (ch = getchar()) != EOF;) {
00170 if (ch == '\0')
00171 continue;
00172 switch (ifmt) {
00173 case FORMAT_NL:
00174 if (ch == '\n')
00175 goto end;
00176 break;
00177 case FORMAT_EXCEL:
00178
00179 if (ch == '\n')
00180 continue;
00181
00182
00183
00184
00185 if (ch == '\015') {
00186 if (len == 0)
00187 continue;
00188 goto end;
00189 }
00190 }
00191 if (input_line == endp) {
00192 input_len += 256;
00193 input_len *= 2;
00194 if ((input_line =
00195 realloc(input_line, input_len)) == NULL) {
00196 dbenv->err(dbenv, errno,
00197 "unable to allocate %lu bytes for record",
00198 (u_long)input_len);
00199 return (GL_FAIL);
00200 }
00201 p = input_line;
00202 endp = p + input_len;
00203 }
00204
00205 if (isprint(ch)) {
00206 *p++ = (char)ch;
00207 ++len;
00208 }
00209 }
00210
00211 end: if (len == 0)
00212 return (GL_EOF);
00213
00214 *lenp = len;
00215 *input_linep = input_line;
00216 *input_lenp = input_len;
00217
00218 return (GL_OK);
00219 }
00220
00221
00222
00223
00224
00225 static int
00226 input_field_count(const char *line, size_t len, u_int32_t *field_countp)
00227 {
00228 u_int32_t field_count;
00229 int quoted;
00230
00231 field_count = 1;
00232
00233
00234
00235
00236
00237 switch (ifmt) {
00238 case FORMAT_EXCEL:
00239 quoted = 0;
00240 for (field_count = 1; len > 0; ++line, --len)
00241 if (*line == '"')
00242 quoted = !quoted;
00243 else if (*line == ',' && !quoted)
00244 ++field_count;
00245 break;
00246 case FORMAT_NL:
00247 for (field_count = 1; len > 0; ++line, --len)
00248 if (*line == ',')
00249 ++field_count;
00250 break;
00251 }
00252 *field_countp = field_count;
00253
00254 if (verbose)
00255 dbenv->errx(dbenv,
00256 "input file made up of %lu fields", (u_int)field_count);
00257
00258 return (0);
00259 }
00260
00261
00262
00263
00264
00265 static int
00266 input_put_alloc(u_int32_t **put_linep,
00267 size_t *put_lenp, size_t len, u_int32_t field_count)
00268 {
00269 size_t total;
00270
00271 total = (field_count + 2) * sizeof(u_int32_t) + len;
00272 if (total > *put_lenp &&
00273 (*put_linep = realloc(*put_linep, *put_lenp += total)) == NULL) {
00274 dbenv->err(dbenv, errno,
00275 "unable to allocate %lu bytes for record",
00276 (u_long)*put_lenp);
00277 return (1);
00278 }
00279 return (0);
00280 }
00281
00282
00283
00284
00285
00286 static int
00287 input_set_offset(u_int32_t *put_line,
00288 char *input_line, size_t len, u_int32_t field_count)
00289 {
00290 u_int32_t *op;
00291 int quoted;
00292 char *p, *endp;
00293
00294 op = put_line;
00295
00296
00297 *op++ = version;
00298
00299
00300
00301
00302
00303 *op++ = 0;
00304 quoted = 0;
00305 for (p = input_line, endp = input_line + len;; ++p) {
00306 if (ifmt == FORMAT_EXCEL && p < endp) {
00307 if (*p == '"')
00308 quoted = !quoted;
00309 if (quoted)
00310 continue;
00311 }
00312 if (*p == ',' || p == endp) {
00313 if (field_count == 0) {
00314 dbenv->errx(dbenv,
00315 "record %lu: too many fields in the record",
00316 record_count);
00317 return (1);
00318 }
00319 --field_count;
00320
00321 *op++ = (u_int32_t)(p - input_line) + 1;
00322
00323 if (verbose > 1)
00324 dbenv->errx(dbenv,
00325 "offset %lu: {%.*s}", op[-1],
00326 OFFSET_LEN(op, -2), input_line + op[-2]);
00327
00328
00329
00330
00331
00332 if (p == endp || p + 1 == endp)
00333 break;
00334 }
00335 }
00336 *op++ = (u_int32_t)(p - input_line);
00337
00338 if (field_count != 0) {
00339 dbenv->errx(dbenv,
00340 "record %lu: not enough fields in the record",
00341 record_count);
00342 return (1);
00343 }
00344 memcpy(op, input_line, len);
00345
00346 return (0);
00347 }