Commit | Line | Data |
---|---|---|
b0322a85 CE |
1 | /* |
2 | ** Copyright 2011 Double Precision, Inc. | |
3 | ** See COPYING for distribution information. | |
4 | ** | |
5 | */ | |
6 | ||
7 | #include "unicode_config.h" | |
8 | #include "unicode.h" | |
9 | #include <unistd.h> | |
10 | #include <stdint.h> | |
11 | #include <stdlib.h> | |
12 | ||
13 | #define UNICODE_GRAPHEMEBREAK_ANY 0x00 | |
14 | #define UNICODE_GRAPHEMEBREAK_CR 0x01 | |
15 | #define UNICODE_GRAPHEMEBREAK_LF 0x02 | |
16 | #define UNICODE_GRAPHEMEBREAK_Control 0x03 | |
17 | #define UNICODE_GRAPHEMEBREAK_Extend 0x04 | |
18 | #define UNICODE_GRAPHEMEBREAK_Prepend 0x05 | |
19 | #define UNICODE_GRAPHEMEBREAK_SpacingMark 0x06 | |
20 | #define UNICODE_GRAPHEMEBREAK_L 0x07 | |
21 | #define UNICODE_GRAPHEMEBREAK_V 0x08 | |
22 | #define UNICODE_GRAPHEMEBREAK_T 0x09 | |
23 | #define UNICODE_GRAPHEMEBREAK_LV 0x0A | |
24 | #define UNICODE_GRAPHEMEBREAK_LVT 0x0B | |
25 | ||
26 | #include "graphemebreaktab.h" | |
27 | ||
28 | int unicode_grapheme_break(unicode_char a, unicode_char b) | |
29 | { | |
30 | uint8_t ac=unicode_tab_lookup(a, unicode_indextab, | |
31 | sizeof(unicode_indextab)/sizeof(unicode_indextab[0]), | |
32 | unicode_rangetab, | |
33 | unicode_classtab, | |
34 | UNICODE_GRAPHEMEBREAK_ANY), | |
35 | bc=unicode_tab_lookup(b, unicode_indextab, | |
36 | sizeof(unicode_indextab)/sizeof(unicode_indextab[0]), | |
37 | unicode_rangetab, | |
38 | unicode_classtab, | |
39 | UNICODE_GRAPHEMEBREAK_ANY); | |
40 | ||
41 | /* GB1 and GB2 are implied */ | |
42 | ||
43 | if (ac == UNICODE_GRAPHEMEBREAK_CR && bc == UNICODE_GRAPHEMEBREAK_LF) | |
44 | return 0; /* GB3 */ | |
45 | ||
46 | ||
47 | switch (ac) { | |
48 | case UNICODE_GRAPHEMEBREAK_CR: | |
49 | case UNICODE_GRAPHEMEBREAK_LF: | |
50 | case UNICODE_GRAPHEMEBREAK_Control: | |
51 | return 1; /* GB4 */ | |
52 | default: | |
53 | break; | |
54 | } | |
55 | ||
56 | switch (bc) { | |
57 | case UNICODE_GRAPHEMEBREAK_CR: | |
58 | case UNICODE_GRAPHEMEBREAK_LF: | |
59 | case UNICODE_GRAPHEMEBREAK_Control: | |
60 | return 1; /* GB5 */ | |
61 | default: | |
62 | break; | |
63 | } | |
64 | ||
65 | if (ac == UNICODE_GRAPHEMEBREAK_L) | |
66 | switch (bc) { | |
67 | case UNICODE_GRAPHEMEBREAK_L: | |
68 | case UNICODE_GRAPHEMEBREAK_V: | |
69 | case UNICODE_GRAPHEMEBREAK_LV: | |
70 | case UNICODE_GRAPHEMEBREAK_LVT: | |
71 | return 0; /* GB6 */ | |
72 | } | |
73 | ||
74 | if ((ac == UNICODE_GRAPHEMEBREAK_LV || | |
75 | ac == UNICODE_GRAPHEMEBREAK_V) && | |
76 | (bc == UNICODE_GRAPHEMEBREAK_V || | |
77 | bc == UNICODE_GRAPHEMEBREAK_T)) | |
78 | return 0; /* GB7 */ | |
79 | ||
80 | if ((ac == UNICODE_GRAPHEMEBREAK_LVT || | |
81 | ac == UNICODE_GRAPHEMEBREAK_T) && | |
82 | bc == UNICODE_GRAPHEMEBREAK_T) | |
83 | return 0; /* GB8 */ | |
84 | ||
85 | if (bc == UNICODE_GRAPHEMEBREAK_Extend) | |
86 | return 0; /* GB9 */ | |
87 | ||
88 | if (bc == UNICODE_GRAPHEMEBREAK_SpacingMark) | |
89 | return 0; /* GB9a */ | |
90 | ||
91 | if (ac == UNICODE_GRAPHEMEBREAK_Prepend) | |
92 | return 0; /* GB9b */ | |
93 | ||
94 | return 1; /* GB10 */ | |
95 | } |