Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "postgres_fe.h"
00014 #include "mbprint.h"
00015 #ifndef PGSCRIPTS
00016 #include "settings.h"
00017 #endif
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 typedef unsigned int pg_wchar;
00031
00032 static int
00033 pg_get_utf8_id(void)
00034 {
00035 static int utf8_id = -1;
00036
00037 if (utf8_id < 0)
00038 utf8_id = pg_char_to_encoding("utf8");
00039 return utf8_id;
00040 }
00041
00042 #define PG_UTF8 pg_get_utf8_id()
00043
00044
00045
00046
00047
00048
00049
00050
00051 static pg_wchar
00052 utf8_to_unicode(const unsigned char *c)
00053 {
00054 if ((*c & 0x80) == 0)
00055 return (pg_wchar) c[0];
00056 else if ((*c & 0xe0) == 0xc0)
00057 return (pg_wchar) (((c[0] & 0x1f) << 6) |
00058 (c[1] & 0x3f));
00059 else if ((*c & 0xf0) == 0xe0)
00060 return (pg_wchar) (((c[0] & 0x0f) << 12) |
00061 ((c[1] & 0x3f) << 6) |
00062 (c[2] & 0x3f));
00063 else if ((*c & 0xf8) == 0xf0)
00064 return (pg_wchar) (((c[0] & 0x07) << 18) |
00065 ((c[1] & 0x3f) << 12) |
00066 ((c[2] & 0x3f) << 6) |
00067 (c[3] & 0x3f));
00068 else
00069
00070 return 0xffffffff;
00071 }
00072
00073
00074
00075
00076
00077
00078
00079
00080 static int
00081 utf_charcheck(const unsigned char *c)
00082 {
00083 if ((*c & 0x80) == 0)
00084 return 1;
00085 else if ((*c & 0xe0) == 0xc0)
00086 {
00087
00088 if (((c[1] & 0xc0) == 0x80) && ((c[0] & 0x1f) > 0x01))
00089 return 2;
00090 return -1;
00091 }
00092 else if ((*c & 0xf0) == 0xe0)
00093 {
00094
00095 if (((c[1] & 0xc0) == 0x80) &&
00096 (((c[0] & 0x0f) != 0x00) || ((c[1] & 0x20) == 0x20)) &&
00097 ((c[2] & 0xc0) == 0x80))
00098 {
00099 int z = c[0] & 0x0f;
00100 int yx = ((c[1] & 0x3f) << 6) | (c[0] & 0x3f);
00101 int lx = yx & 0x7f;
00102
00103
00104 if (((z == 0x0f) &&
00105 (((yx & 0xffe) == 0xffe) ||
00106 (((yx & 0xf80) == 0xd80) && (lx >= 0x30) && (lx <= 0x4f)))) ||
00107 ((z == 0x0d) && ((yx & 0xb00) == 0x800)))
00108 return -1;
00109 return 3;
00110 }
00111 return -1;
00112 }
00113 else if ((*c & 0xf8) == 0xf0)
00114 {
00115 int u = ((c[0] & 0x07) << 2) | ((c[1] & 0x30) >> 4);
00116
00117
00118 if (((c[1] & 0xc0) == 0x80) &&
00119 (u > 0x00) && (u <= 0x10) &&
00120 ((c[2] & 0xc0) == 0x80) && ((c[3] & 0xc0) == 0x80))
00121 {
00122
00123 if (((c[1] & 0x0f) == 0x0f) && ((c[2] & 0x3f) == 0x3f) &&
00124 ((c[3] & 0x3e) == 0x3e))
00125 return -1;
00126 return 4;
00127 }
00128 return -1;
00129 }
00130 return -1;
00131 }
00132
00133
00134 static void
00135 mb_utf_validate(unsigned char *pwcs)
00136 {
00137 unsigned char *p = pwcs;
00138
00139 while (*pwcs)
00140 {
00141 int len;
00142
00143 if ((len = utf_charcheck(pwcs)) > 0)
00144 {
00145 if (p != pwcs)
00146 {
00147 int i;
00148
00149 for (i = 0; i < len; i++)
00150 *p++ = *pwcs++;
00151 }
00152 else
00153 {
00154 pwcs += len;
00155 p += len;
00156 }
00157 }
00158 else
00159
00160 pwcs++;
00161 }
00162 if (p != pwcs)
00163 *p = '\0';
00164 }
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175 int
00176 pg_wcswidth(const char *pwcs, size_t len, int encoding)
00177 {
00178 int width = 0;
00179
00180 while (len > 0)
00181 {
00182 int chlen,
00183 chwidth;
00184
00185 chlen = PQmblen(pwcs, encoding);
00186 if (len < (size_t) chlen)
00187 break;
00188
00189 chwidth = PQdsplen(pwcs, encoding);
00190 if (chwidth > 0)
00191 width += chwidth;
00192
00193 pwcs += chlen;
00194 len -= chlen;
00195 }
00196 return width;
00197 }
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209 void
00210 pg_wcssize(const unsigned char *pwcs, size_t len, int encoding,
00211 int *result_width, int *result_height, int *result_format_size)
00212 {
00213 int w,
00214 chlen = 0,
00215 linewidth = 0;
00216 int width = 0;
00217 int height = 1;
00218 int format_size = 0;
00219
00220 for (; *pwcs && len > 0; pwcs += chlen)
00221 {
00222 chlen = PQmblen((const char *) pwcs, encoding);
00223 if (len < (size_t) chlen)
00224 break;
00225 w = PQdsplen((const char *) pwcs, encoding);
00226
00227 if (chlen == 1)
00228 {
00229 if (*pwcs == '\n')
00230 {
00231 if (linewidth > width)
00232 width = linewidth;
00233 linewidth = 0;
00234 height += 1;
00235 format_size += 1;
00236 }
00237 else if (*pwcs == '\r')
00238 {
00239 linewidth += 2;
00240 format_size += 2;
00241 }
00242 else if (*pwcs == '\t')
00243 {
00244 do
00245 {
00246 linewidth++;
00247 format_size++;
00248 } while (linewidth % 8 != 0);
00249 }
00250 else if (w < 0)
00251 {
00252 linewidth += 4;
00253 format_size += 4;
00254 }
00255 else
00256 {
00257 linewidth += w;
00258 format_size += 1;
00259 }
00260 }
00261 else if (w < 0)
00262 {
00263 linewidth += 6;
00264 format_size += 6;
00265 }
00266 else
00267 {
00268 linewidth += w;
00269 format_size += chlen;
00270 }
00271 len -= chlen;
00272 }
00273 if (linewidth > width)
00274 width = linewidth;
00275 format_size += 1;
00276
00277
00278 if (result_width)
00279 *result_width = width;
00280 if (result_height)
00281 *result_height = height;
00282 if (result_format_size)
00283 *result_format_size = format_size;
00284 }
00285
00286
00287
00288
00289
00290
00291
00292 void
00293 pg_wcsformat(const unsigned char *pwcs, size_t len, int encoding,
00294 struct lineptr * lines, int count)
00295 {
00296 int w,
00297 chlen = 0;
00298 int linewidth = 0;
00299 unsigned char *ptr = lines->ptr;
00300
00301 for (; *pwcs && len > 0; pwcs += chlen)
00302 {
00303 chlen = PQmblen((const char *) pwcs, encoding);
00304 if (len < (size_t) chlen)
00305 break;
00306 w = PQdsplen((const char *) pwcs, encoding);
00307
00308 if (chlen == 1)
00309 {
00310 if (*pwcs == '\n')
00311 {
00312 *ptr++ = '\0';
00313 lines->width = linewidth;
00314 linewidth = 0;
00315 lines++;
00316 count--;
00317 if (count <= 0)
00318 exit(1);
00319
00320
00321 lines->ptr = ptr;
00322 }
00323 else if (*pwcs == '\r')
00324 {
00325 strcpy((char *) ptr, "\\r");
00326 linewidth += 2;
00327 ptr += 2;
00328 }
00329 else if (*pwcs == '\t')
00330 {
00331 do
00332 {
00333 *ptr++ = ' ';
00334 linewidth++;
00335 } while (linewidth % 8 != 0);
00336 }
00337 else if (w < 0)
00338 {
00339 sprintf((char *) ptr, "\\x%02X", *pwcs);
00340 linewidth += 4;
00341 ptr += 4;
00342 }
00343 else
00344 {
00345 linewidth += w;
00346 *ptr++ = *pwcs;
00347 }
00348 }
00349 else if (w < 0)
00350 {
00351 if (encoding == PG_UTF8)
00352 sprintf((char *) ptr, "\\u%04X", utf8_to_unicode(pwcs));
00353 else
00354 {
00355
00356
00357
00358
00359
00360 sprintf((char *) ptr, "\\u????");
00361 }
00362 ptr += 6;
00363 linewidth += 6;
00364 }
00365 else
00366 {
00367 int i;
00368
00369 for (i = 0; i < chlen; i++)
00370 *ptr++ = pwcs[i];
00371 linewidth += w;
00372 }
00373 len -= chlen;
00374 }
00375 lines->width = linewidth;
00376 *ptr++ = '\0';
00377
00378 if (count <= 0)
00379 exit(1);
00380
00381 (lines + 1)->ptr = NULL;
00382 }
00383
00384 unsigned char *
00385 mbvalidate(unsigned char *pwcs, int encoding)
00386 {
00387 if (encoding == PG_UTF8)
00388 mb_utf_validate(pwcs);
00389 else
00390 {
00391
00392
00393
00394
00395 }
00396
00397 return pwcs;
00398 }