00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <config.h>
00022
00023 #include "api_unicode.h"
00024
00025 #include <xapian.h>
00026
00027 #include "apitest.h"
00028 #include "testutils.h"
00029
00030 #include <cctype>
00031
00032 using namespace std;
00033
00034 struct testcase {
00035 const char * a, * b;
00036 };
00037
00038 static const testcase testcases[] = {
00039 { "abcd", "abcd" },
00040 { "a\x80""bcd", "a\xc2\x80""bcd" },
00041 { "a\xa0", "a\xc2\xa0" },
00042 { 0, 0 }
00043 };
00044
00045
00046 DEFINE_TESTCASE(utf8iterator1,!backend) {
00047 const testcase * p;
00048 for (p = testcases; p->a; ++p) {
00049 tout << '"' << p->a << "\" and \"" << p->b << '"' << endl;
00050 size_t a_len = strlen(p->a);
00051 Xapian::Utf8Iterator a(p->a, a_len);
00052
00053 size_t b_len = strlen(p->b);
00054 Xapian::Utf8Iterator b(p->b, b_len);
00055
00056 while (a != Xapian::Utf8Iterator() && b != Xapian::Utf8Iterator()) {
00057 TEST_EQUAL(*a, *b);
00058 ++a;
00059 ++b;
00060 }
00061
00062
00063 TEST(a == Xapian::Utf8Iterator());
00064 TEST(b == Xapian::Utf8Iterator());
00065 }
00066 return true;
00067 }
00068
00069 struct testcase2 {
00070 const char * a;
00071 unsigned long n;
00072 };
00073
00074 static const testcase2 testcases2[] = {
00075 { "a", 97 },
00076 { "\x80", 128 },
00077 { "\xa0", 160 },
00078 { "\xc2\x80", 128 },
00079 { "\xc2\xa0", 160 },
00080 { "\xf0\xa8\xa8\x8f", 166415 },
00081 { 0, 0 }
00082 };
00083
00084
00085 DEFINE_TESTCASE(utf8iterator2,!backend) {
00086 const testcase2 * p;
00087 for (p = testcases2; p->a; ++p) {
00088 Xapian::Utf8Iterator a(p->a, strlen(p->a));
00089
00090 TEST(a != Xapian::Utf8Iterator());
00091 TEST_EQUAL(*a, p->n);
00092 TEST(++a == Xapian::Utf8Iterator());
00093 }
00094 return true;
00095 }
00096
00097
00098 DEFINE_TESTCASE(unicode1,!backend) {
00099 using namespace Xapian;
00100 TEST_EQUAL(Unicode::get_category('a'), Unicode::LOWERCASE_LETTER);
00101 TEST_EQUAL(Unicode::get_category('0'), Unicode::DECIMAL_DIGIT_NUMBER);
00102 TEST_EQUAL(Unicode::get_category('$'), Unicode::CURRENCY_SYMBOL);
00103 TEST_EQUAL(Unicode::get_category(0xa3), Unicode::CURRENCY_SYMBOL);
00104
00105 TEST_EQUAL(Unicode::get_category(0x242), Unicode::LOWERCASE_LETTER);
00106 TEST_EQUAL(Unicode::get_category(0xFFFF), Unicode::UNASSIGNED);
00107
00108 TEST_EQUAL(Unicode::get_category(0x10345), Unicode::OTHER_LETTER);
00109 TEST_EQUAL(Unicode::get_category(0x10FFFD), Unicode::PRIVATE_USE);
00110 TEST_EQUAL(Unicode::get_category(0x10FFFF), Unicode::UNASSIGNED);
00111
00112 TEST_EQUAL(Unicode::get_category(0x110000), Unicode::UNASSIGNED);
00113 TEST_EQUAL(Unicode::get_category(0xFFFFFFFF), Unicode::UNASSIGNED);
00114 return true;
00115 }
00116
00117 DEFINE_TESTCASE(caseconvert1,!backend) {
00118 using namespace Xapian;
00119 for (unsigned ch = 0; ch < 128; ++ch) {
00120 if (isupper((char)ch)) {
00121 TEST_EQUAL(Unicode::tolower(ch), unsigned(tolower((char)ch)));
00122 } else {
00123 TEST_EQUAL(Unicode::tolower(ch), ch);
00124 }
00125 if (islower((char)ch)) {
00126 TEST_EQUAL(Unicode::toupper(ch), unsigned(toupper((char)ch)));
00127 } else {
00128 TEST_EQUAL(Unicode::toupper(ch), ch);
00129 }
00130 }
00131
00132
00133 TEST_EQUAL(Unicode::tolower(0x242), 0x242);
00134 TEST_EQUAL(Unicode::toupper(0x242), 0x241);
00135 TEST_EQUAL(Unicode::toupper(0x241), 0x241);
00136 TEST_EQUAL(Unicode::tolower(0x241), 0x242);
00137
00138
00139 TEST_EQUAL(Unicode::tolower(0xa3), 0xa3);
00140 TEST_EQUAL(Unicode::toupper(0xa3), 0xa3);
00141
00142 TEST_EQUAL(Unicode::tolower(0xFFFF), 0xFFFF);
00143 TEST_EQUAL(Unicode::toupper(0xFFFF), 0xFFFF);
00144
00145 TEST_EQUAL(Unicode::tolower(0x10345), 0x10345);
00146 TEST_EQUAL(Unicode::toupper(0x10345), 0x10345);
00147 TEST_EQUAL(Unicode::tolower(0x10FFFD), 0x10FFFD);
00148 TEST_EQUAL(Unicode::toupper(0x10FFFD), 0x10FFFD);
00149 TEST_EQUAL(Unicode::tolower(0x10FFFF), 0x10FFFF);
00150 TEST_EQUAL(Unicode::toupper(0x10FFFF), 0x10FFFF);
00151
00152 TEST_EQUAL(Unicode::tolower(0x110000), 0x110000);
00153 TEST_EQUAL(Unicode::toupper(0x110000), 0x110000);
00154 TEST_EQUAL(Unicode::tolower(0xFFFFFFFF), 0xFFFFFFFF);
00155 TEST_EQUAL(Unicode::toupper(0xFFFFFFFF), 0xFFFFFFFF);
00156
00157 return true;
00158 }