| 1 | #include "unicode_config.h" |
| 2 | #include "unicode.h" |
| 3 | |
| 4 | #include <iostream> |
| 5 | #include <fstream> |
| 6 | #include <sstream> |
| 7 | #include <iomanip> |
| 8 | #include <algorithm> |
| 9 | #include <functional> |
| 10 | #include <cstdlib> |
| 11 | #include <list> |
| 12 | #include <vector> |
| 13 | |
| 14 | class collect_wordbreakflags : public mail::wordbreak_callback_base { |
| 15 | |
| 16 | public: |
| 17 | |
| 18 | std::vector<bool> flags; |
| 19 | |
| 20 | template<typename iter_type> void operator()(iter_type b, iter_type e) |
| 21 | { |
| 22 | mail::wordbreak_callback_base::operator()(b, e); |
| 23 | } |
| 24 | |
| 25 | using mail::wordbreak_callback_base::operator<<; |
| 26 | |
| 27 | private: |
| 28 | int operator()(bool flag) |
| 29 | { |
| 30 | flags.push_back(flag); |
| 31 | return 0; |
| 32 | } |
| 33 | }; |
| 34 | |
| 35 | static void testsuite() |
| 36 | { |
| 37 | std::string buf; |
| 38 | int linenum=0; |
| 39 | |
| 40 | std::ifstream fp("WordBreakTest.txt"); |
| 41 | |
| 42 | if (!fp.is_open()) |
| 43 | exit(1); |
| 44 | |
| 45 | while (1) |
| 46 | { |
| 47 | buf.clear(); |
| 48 | |
| 49 | if (std::getline(fp, buf).eof() && buf.empty()) |
| 50 | break; |
| 51 | |
| 52 | ++linenum; |
| 53 | |
| 54 | buf.erase(std::find(buf.begin(), buf.end(), '#'), buf.end()); |
| 55 | |
| 56 | if (buf.empty()) |
| 57 | continue; |
| 58 | |
| 59 | std::list<std::string> words; |
| 60 | |
| 61 | for (std::string::iterator b=buf.begin(), e=buf.end(); b != e;) |
| 62 | { |
| 63 | if (isspace(*b)) |
| 64 | { |
| 65 | ++b; |
| 66 | continue; |
| 67 | } |
| 68 | |
| 69 | std::string::iterator p=b; |
| 70 | |
| 71 | while (b != e) |
| 72 | { |
| 73 | if (isspace(*b)) |
| 74 | break; |
| 75 | ++b; |
| 76 | } |
| 77 | |
| 78 | words.push_back(std::string(p, b)); |
| 79 | } |
| 80 | |
| 81 | std::vector<unicode_char> ubuf; |
| 82 | std::vector<bool> status; |
| 83 | |
| 84 | while (1) |
| 85 | { |
| 86 | if (!words.empty() && words.front().size() > 1) |
| 87 | { |
| 88 | bool flag=false; |
| 89 | std::string s=words.front(); |
| 90 | |
| 91 | words.pop_front(); |
| 92 | |
| 93 | if ((unsigned char)s[0] == |
| 94 | (unsigned char)0xc3 && |
| 95 | (unsigned char)s[1] == (unsigned char)0xb7) |
| 96 | flag=true; |
| 97 | |
| 98 | if (words.empty()) |
| 99 | break; |
| 100 | |
| 101 | status.push_back(flag); |
| 102 | |
| 103 | std::istringstream i(words.front()); |
| 104 | |
| 105 | unicode_char uc; |
| 106 | |
| 107 | i >> std::hex >> uc; |
| 108 | |
| 109 | words.pop_front(); |
| 110 | |
| 111 | if (!i.fail()) |
| 112 | { |
| 113 | ubuf.push_back(uc); |
| 114 | continue; |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | std::cerr << "Parse error, line " << linenum |
| 119 | << ": " << buf << std::endl; |
| 120 | exit(1); |
| 121 | } |
| 122 | |
| 123 | if (linenum == 24) |
| 124 | { |
| 125 | linenum=24; |
| 126 | } |
| 127 | collect_wordbreakflags flags; |
| 128 | |
| 129 | flags(ubuf.begin(), ubuf.end()); |
| 130 | flags.finish(); |
| 131 | |
| 132 | if (status != flags.flags) |
| 133 | { |
| 134 | std::cerr << "Regression, line " << linenum |
| 135 | << ": " << buf << std::endl; |
| 136 | exit(1); |
| 137 | } |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | int main(int argc, char **argv) |
| 142 | { |
| 143 | testsuite(); |
| 144 | return 0; |
| 145 | } |