MediaWiki
REL1_19
|
00001 #include <stdlib.h> 00002 #include <stdio.h> 00003 #include <string.h> 00004 /* 00005 Unicode UTF8 00006 0x00000000 - 0x0000007F: 0xxxxxxx 00007 0x00000080 - 0x000007FF: 110xxx xx 10xx xxxx 00008 0x00000800 - 0x0000FFFF: 1110xxxx 10xxxx xx 10xx xxxx 00009 0x00010000 - 0x001FFFFF: 11110x xx 10xx xxxx 10xxxx xx 10xx xxxx 00010 0x00200000 - 0x03FFFFFF: 111110xx 10xxxx xx 10xx xxxx 10xxxx xx 10xx xxxx 00011 0x04000000 - 0x7FFFFFFF: 1111110x 10xx xxxx 10xxxx xx 10xx xxxx 10xxxx xx 10xx xxxx 00012 00013 0000 0 1001 9 00014 0001 1 1010 A 00015 0010 2 1011 B 00016 0011 3 1100 C 00017 0100 4 1101 D 00018 0101 5 1110 E 00019 0110 6 1111 F 00020 0111 7 00021 1000 8 00022 */ 00023 void printUTF8(long long u) { 00024 long long m; 00025 if(u<0x80) { 00026 printf("%c", (unsigned char)u); 00027 } 00028 else if(u<0x800) { 00029 m = ((u&0x7c0)>>6) | 0xc0; 00030 printf("%c", (unsigned char)m); 00031 m = (u&0x3f) | 0x80; 00032 printf("%c", (unsigned char)m); 00033 } 00034 else if(u<0x10000) { 00035 m = ((u&0xf000)>>12) | 0xe0; 00036 printf("%c",(unsigned char)m); 00037 m = ((u&0xfc0)>>6) | 0x80; 00038 printf("%c",(unsigned char)m); 00039 m = (u & 0x3f) | 0x80; 00040 printf("%c",(unsigned char)m); 00041 } 00042 else if(u<0x200000) { 00043 m = ((u&0x1c0000)>>18) | 0xf0; 00044 printf("%c", (unsigned char)m); 00045 m = ((u& 0x3f000)>>12) | 0x80; 00046 printf("%c", (unsigned char)m); 00047 m = ((u& 0xfc0)>>6) | 0x80; 00048 printf("%c", (unsigned char)m); 00049 m = (u&0x3f) | 0x80; 00050 printf("%c", (unsigned char)m); 00051 } 00052 else if(u<0x4000000){ 00053 m = ((u&0x3000000)>>24) | 0xf8; 00054 printf("%c", (unsigned char)m); 00055 m = ((u&0xfc0000)>>18) | 0x80; 00056 printf("%c", (unsigned char)m); 00057 m = ((u&0x3f000)>>12) | 0x80; 00058 printf("%c", (unsigned char)m); 00059 m = ((u&0xfc00)>>6) | 0x80; 00060 printf("%c", (unsigned char)m); 00061 m = (u&0x3f) | 0x80; 00062 printf("%c", (unsigned char)m); 00063 } 00064 else { 00065 m = ((u&0x40000000)>>30) | 0xfc; 00066 printf("%c", (unsigned char)m); 00067 m = ((u&0x3f000000)>>24) | 0x80; 00068 printf("%c", (unsigned char)m); 00069 m = ((u&0xfc0000)>>18) | 0x80; 00070 printf("%c", (unsigned char)m); 00071 m = ((u&0x3f000)>>12) | 0x80; 00072 printf("%c", (unsigned char)m); 00073 m = ((u&0xfc0)>>6) | 0x80; 00074 printf("%c", (unsigned char)m); 00075 m = (u&0x3f)| 0x80; 00076 printf("%c", (unsigned char)m); 00077 } 00078 } 00079 00080 int main() { 00081 int i,j; 00082 long long n1, n2; 00083 unsigned char b1[15], b2[15]; 00084 unsigned char buf[1024]; 00085 i=0; 00086 while(fgets(buf, 1024, stdin)) { 00087 // printf("read %s\n", buf); 00088 for(i=0;i<strlen(buf); i++) 00089 if(buf[i]=='U') { 00090 if(buf[i+1] == '+') { 00091 n1 = strtoll(buf+i+2,0,16); 00092 printf("U+%05x", n1); 00093 printUTF8(n1);printf("|"); 00094 } 00095 } 00096 printf("\n"); 00097 } 00098 } 00099