MediaWiki  REL1_19
printutf8.c
Go to the documentation of this file.
00001 #include <stdlib.h>
00002 #include <stdio.h>
00003 #include <string.h>
00004 /* 
00005  Unicode                   UTF8
00006 0x00000000 - 0x0000007F: 0xxxxxxx
00007 0x00000080 - 0x000007FF: 110xxx xx 10xx xxxx
00008 0x00000800 - 0x0000FFFF: 1110xxxx  10xxxx xx 10xx xxxx
00009 0x00010000 - 0x001FFFFF: 11110x xx 10xx xxxx 10xxxx xx 10xx xxxx
00010 0x00200000 - 0x03FFFFFF: 111110xx  10xxxx xx 10xx xxxx 10xxxx xx 10xx xxxx
00011 0x04000000 - 0x7FFFFFFF: 1111110x  10xx xxxx 10xxxx xx 10xx xxxx 10xxxx xx 10xx xxxx
00012 
00013 0000 0      1001 9
00014 0001 1      1010 A
00015 0010 2      1011 B
00016 0011 3      1100 C
00017 0100 4      1101 D 
00018 0101 5      1110 E
00019 0110 6      1111 F
00020 0111 7
00021 1000 8
00022 */
00023 void printUTF8(long long u) {
00024   long long m;
00025   if(u<0x80) {
00026     printf("%c", (unsigned char)u);
00027   }
00028   else if(u<0x800) {
00029     m = ((u&0x7c0)>>6) | 0xc0;
00030     printf("%c", (unsigned char)m);
00031     m = (u&0x3f) | 0x80;
00032     printf("%c", (unsigned char)m);
00033   }
00034   else if(u<0x10000) {
00035     m = ((u&0xf000)>>12) | 0xe0;
00036     printf("%c",(unsigned char)m);
00037     m = ((u&0xfc0)>>6) | 0x80;
00038     printf("%c",(unsigned char)m);
00039     m = (u & 0x3f) | 0x80;
00040     printf("%c",(unsigned char)m);
00041   }
00042   else if(u<0x200000) {
00043     m = ((u&0x1c0000)>>18) | 0xf0;
00044     printf("%c", (unsigned char)m);
00045     m = ((u& 0x3f000)>>12) | 0x80;
00046     printf("%c", (unsigned char)m);
00047     m = ((u& 0xfc0)>>6) | 0x80;
00048     printf("%c", (unsigned char)m);
00049     m = (u&0x3f) | 0x80;
00050     printf("%c", (unsigned char)m);
00051   }
00052   else if(u<0x4000000){
00053     m = ((u&0x3000000)>>24) | 0xf8;
00054     printf("%c", (unsigned char)m);
00055     m = ((u&0xfc0000)>>18) | 0x80;
00056     printf("%c", (unsigned char)m);
00057     m = ((u&0x3f000)>>12) | 0x80;
00058     printf("%c", (unsigned char)m);
00059     m = ((u&0xfc00)>>6) | 0x80;
00060     printf("%c", (unsigned char)m);
00061     m = (u&0x3f) | 0x80;
00062     printf("%c", (unsigned char)m);
00063   }
00064   else {
00065     m = ((u&0x40000000)>>30) | 0xfc;
00066     printf("%c", (unsigned char)m);
00067     m = ((u&0x3f000000)>>24) | 0x80;
00068     printf("%c", (unsigned char)m);
00069     m = ((u&0xfc0000)>>18) | 0x80;
00070     printf("%c", (unsigned char)m);
00071     m = ((u&0x3f000)>>12) | 0x80;
00072     printf("%c", (unsigned char)m);
00073     m = ((u&0xfc0)>>6) | 0x80;
00074     printf("%c", (unsigned char)m);
00075     m = (u&0x3f)| 0x80;
00076     printf("%c", (unsigned char)m);
00077   }
00078 }
00079 
00080 int main() {
00081   int i,j;
00082   long long n1, n2;
00083   unsigned char b1[15], b2[15];
00084   unsigned char buf[1024];
00085   i=0;
00086   while(fgets(buf, 1024, stdin)) {
00087     //    printf("read %s\n", buf);
00088     for(i=0;i<strlen(buf); i++) 
00089       if(buf[i]=='U') {
00090         if(buf[i+1] == '+') {
00091           n1 = strtoll(buf+i+2,0,16);
00092           printf("U+%05x", n1);
00093           printUTF8(n1);printf("|");
00094         }
00095       }
00096     printf("\n");
00097   }
00098 }
00099