Commit | Line | Data |
---|---|---|
4ed46869 KH |
1 | ;;; characters.el --- set syntax and category for multibyte characters |
2 | ||
ba318903 | 3 | ;; Copyright (C) 1997, 2000-2014 Free Software Foundation, Inc. |
7976eda0 | 4 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
5df4f04c | 5 | ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
2fd125a3 KH |
6 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
7 | ;; Registration Number H14PRO021 | |
8f924df7 | 8 | ;; Copyright (C) 2003 |
55bd52ea KH |
9 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
10 | ;; Registration Number H13PRO009 | |
4ed46869 KH |
11 | |
12 | ;; Keywords: multibyte character, character set, syntax, category | |
13 | ||
14 | ;; This file is part of GNU Emacs. | |
15 | ||
4936186e | 16 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
4ed46869 | 17 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
18 | ;; the Free Software Foundation, either version 3 of the License, or |
19 | ;; (at your option) any later version. | |
4ed46869 KH |
20 | |
21 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
22 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
23 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
24 | ;; GNU General Public License for more details. | |
25 | ||
26 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 27 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
4ed46869 KH |
28 | |
29 | ;;; Commentary: | |
30 | ||
60370d40 PJ |
31 | ;;; Code: |
32 | ||
4ed46869 KH |
33 | ;;; Predefined categories. |
34 | ||
35 | ;; For each character set. | |
36 | ||
46bf60bc KH |
37 | (define-category ?a "ASCII |
38 | ASCII graphic characters 32-126 (ISO646 IRV:1983[4/0])") | |
4ed46869 KH |
39 | (define-category ?l "Latin") |
40 | (define-category ?t "Thai") | |
41 | (define-category ?g "Greek") | |
42 | (define-category ?b "Arabic") | |
43 | (define-category ?w "Hebrew") | |
44 | (define-category ?y "Cyrillic") | |
46bf60bc KH |
45 | (define-category ?k "Katakana |
46 | Japanese katakana") | |
47 | (define-category ?r "Roman | |
48 | Japanese roman") | |
4ed46869 KH |
49 | (define-category ?c "Chinese") |
50 | (define-category ?j "Japanese") | |
51 | (define-category ?h "Korean") | |
46bf60bc KH |
52 | (define-category ?e "Ethiopic |
53 | Ethiopic (Ge'ez)") | |
54 | (define-category ?v "Viet | |
55 | Vietnamese") | |
4ed46869 | 56 | (define-category ?i "Indian") |
6eba8645 | 57 | (define-category ?o "Lao") |
9395eb7c | 58 | (define-category ?q "Tibetan") |
4ed46869 KH |
59 | |
60 | ;; For each group (row) of 2-byte character sets. | |
61 | ||
46bf60bc KH |
62 | (define-category ?A "2-byte alnum |
63 | Alpha-numeric characters of 2-byte character sets") | |
64 | (define-category ?C "2-byte han | |
65 | Chinese (Han) characters of 2-byte character sets") | |
66 | (define-category ?G "2-byte Greek | |
67 | Greek characters of 2-byte character sets") | |
68 | (define-category ?H "2-byte Hiragana | |
69 | Japanese Hiragana characters of 2-byte character sets") | |
70 | (define-category ?K "2-byte Katakana | |
71 | Japanese Katakana characters of 2-byte character sets") | |
72 | (define-category ?N "2-byte Korean | |
73 | Korean Hangul characters of 2-byte character sets") | |
91c491e0 | 74 | (define-category ?Y "2-byte Cyrillic |
46bf60bc | 75 | Cyrillic characters of 2-byte character sets") |
4ed46869 KH |
76 | (define-category ?I "Indian Glyphs") |
77 | ||
78 | ;; For phonetic classifications. | |
79 | ||
80 | (define-category ?0 "consonant") | |
46bf60bc | 81 | (define-category ?1 "base vowel |
4eb97232 | 82 | Base (independent) vowel") |
46bf60bc | 83 | (define-category ?2 "upper diacritic |
4eb97232 | 84 | Upper diacritical mark (including upper vowel)") |
46bf60bc | 85 | (define-category ?3 "lower diacritic |
4eb97232 | 86 | Lower diacritical mark (including lower vowel)") |
46bf60bc | 87 | (define-category ?4 "combining tone |
4eb97232 | 88 | Combining tone mark") |
9765a2ba | 89 | (define-category ?5 "symbol") |
4ed46869 | 90 | (define-category ?6 "digit") |
91c491e0 | 91 | (define-category ?7 "vowel diacritic |
4eb97232 | 92 | Vowel-modifying diacritical mark") |
6eba8645 KH |
93 | (define-category ?8 "vowel-signs") |
94 | (define-category ?9 "semivowel lower") | |
4ed46869 KH |
95 | |
96 | ;; For filling. | |
46bf60bc KH |
97 | (define-category ?| "line breakable |
98 | While filling, we can break a line at this character.") | |
4ed46869 | 99 | |
504af7b2 | 100 | ;; For indentation calculation. |
70ea295a | 101 | (define-category ?\s |
46bf60bc KH |
102 | "space for indent |
103 | This character counts as a space for indentation purposes.") | |
504af7b2 | 104 | |
94487c4e | 105 | ;; Keep the following for `kinsoku' processing. See comments in |
4ed46869 | 106 | ;; kinsoku.el. |
46bf60bc KH |
107 | (define-category ?> "Not at bol |
108 | A character which can't be placed at beginning of line.") | |
109 | (define-category ?< "Not at eol | |
110 | A character which can't be placed at end of line.") | |
4ed46869 | 111 | |
8ea6fa80 KH |
112 | ;; Base and Combining |
113 | (define-category ?. "Base | |
114 | Base characters (Unicode General Category L,N,P,S,Zs)") | |
46bf60bc | 115 | (define-category ?^ "Combining |
4eb97232 | 116 | Combining diacritic or mark (Unicode General Category M)") |
f635daa1 CY |
117 | |
118 | ;; bidi types | |
119 | (define-category ?R "Right-to-left (strong) | |
120 | Characters with \"strong\" right-to-left directionality, i.e. | |
121 | with R, AL, RLE, or RLO Unicode bidi character type.") | |
122 | ||
123 | (define-category ?L "Left-to-right (strong) | |
124 | Characters with \"strong\" left-to-right directionality, i.e. | |
125 | with L, LRE, or LRO Unicode bidi character type.") | |
126 | ||
4ed46869 KH |
127 | \f |
128 | ;;; Setting syntax and category. | |
129 | ||
130 | ;; ASCII | |
131 | ||
e2cc40b7 KH |
132 | ;; All ASCII characters have the category `a' (ASCII) and `l' (Latin). |
133 | (modify-category-entry '(32 . 127) ?a) | |
134 | (modify-category-entry '(32 . 127) ?l) | |
4ed46869 | 135 | |
c94ae9eb DL |
136 | ;; Deal with the CJK charsets first. Since the syntax of blocks is |
137 | ;; defined per charset, and the charsets may contain e.g. Latin | |
138 | ;; characters, we end up with the wrong syntax definitions if we're | |
139 | ;; not careful. | |
4ed46869 | 140 | |
66bff5ed | 141 | ;; Chinese characters (Unicode) |
a5bb49e1 KH |
142 | (modify-category-entry '(#x2E80 . #x312F) ?|) |
143 | (modify-category-entry '(#x3190 . #x33FF) ?|) | |
66a85e76 KH |
144 | (modify-category-entry '(#x3400 . #x4DBF) ?C) |
145 | (modify-category-entry '(#x4E00 . #x9FAF) ?C) | |
66bff5ed KH |
146 | (modify-category-entry '(#x3400 . #x9FAF) ?c) |
147 | (modify-category-entry '(#x3400 . #x9FAF) ?|) | |
148 | (modify-category-entry '(#xF900 . #xFAFF) ?C) | |
149 | (modify-category-entry '(#xF900 . #xFAFF) ?c) | |
150 | (modify-category-entry '(#xF900 . #xFAFF) ?|) | |
796f8b2f KH |
151 | (modify-category-entry '(#x20000 . #x2FFFF) ?|) |
152 | (modify-category-entry '(#x20000 . #x2FFFF) ?C) | |
153 | (modify-category-entry '(#x20000 . #x2FFFF) ?c) | |
8e4cd685 | 154 | |
4ed46869 KH |
155 | |
156 | ;; Chinese character set (GB2312) | |
157 | ||
66bff5ed KH |
158 | (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2121 #x217E) |
159 | (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2221 #x227E) | |
160 | (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2921 #x297E) | |
4ed46869 | 161 | |
87a39edb | 162 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c) |
9ad4b491 KH |
163 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2330 #x2339) |
164 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2341 #x235A) | |
165 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2361 #x237A) | |
66bff5ed KH |
166 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?H #x2421 #x247E) |
167 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?K #x2521 #x257E) | |
168 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?G #x2621 #x267E) | |
169 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E) | |
170 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E) | |
4ed46869 KH |
171 | |
172 | ;; Chinese character set (BIG5) | |
173 | ||
e7259832 | 174 | (map-charset-chars #'modify-category-entry 'big5 ?c) |
66a85e76 | 175 | (map-charset-chars #'modify-category-entry 'big5 ?C #xA259 #xA261) |
9ad4b491 | 176 | (map-charset-chars #'modify-category-entry 'big5 ?C #xA440 #xC67E) |
66a85e76 | 177 | (map-charset-chars #'modify-category-entry 'big5 ?C #xC940 #xF9DC) |
4ed46869 KH |
178 | |
179 | ;; Chinese character set (CNS11643) | |
180 | ||
87a39edb DL |
181 | (dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3 |
182 | chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6 | |
183 | chinese-cns11643-7)) | |
184 | (map-charset-chars #'modify-category-entry c ?c) | |
9ad4b491 KH |
185 | (if (eq c 'chinese-cns11643-1) |
186 | (map-charset-chars #'modify-category-entry c ?C #x4421 #x7E7E) | |
8e4cd685 | 187 | (map-charset-chars #'modify-category-entry c ?C))) |
4ed46869 | 188 | |
8f924df7 | 189 | ;; Japanese character set (JISX0201, JISX0208, JISX0212, JISX0213) |
4ed46869 | 190 | |
66bff5ed | 191 | (map-charset-chars #'modify-category-entry 'katakana-jisx0201 ?k) |
4ed46869 | 192 | |
66bff5ed | 193 | (map-charset-chars #'modify-category-entry 'latin-jisx0201 ?r) |
4ed46869 | 194 | |
8f924df7 | 195 | (dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212 |
761f6427 KH |
196 | japanese-jisx0213-1 japanese-jisx0213-2 |
197 | cp932-2-byte)) | |
8e4cd685 | 198 | (map-charset-chars #'modify-category-entry l ?j)) |
4ed46869 | 199 | |
c4186f9c KH |
200 | ;; Fullwidth characters |
201 | (modify-category-entry '(#xff01 . #xff60) ?\|) | |
202 | ||
269a5dd0 | 203 | ;; Unicode equivalents of JISX0201-kana |
66bff5ed KH |
204 | (let ((range '(#xff61 . #xff9f))) |
205 | (modify-category-entry range ?k) | |
206 | (modify-category-entry range ?j) | |
207 | (modify-category-entry range ?\|)) | |
269a5dd0 DL |
208 | |
209 | ;; Katakana block | |
796f8b2f KH |
210 | (modify-category-entry '(#x3099 . #x309C) ?K) |
211 | (modify-category-entry '(#x30A0 . #x30FF) ?K) | |
6f3ac1e1 | 212 | (modify-category-entry '(#x31F0 . #x31FF) ?K) |
b11c2874 | 213 | (modify-category-entry '(#x30A0 . #x30FA) ?\|) |
796f8b2f | 214 | (modify-category-entry #x30FF ?\|) |
269a5dd0 DL |
215 | |
216 | ;; Hiragana block | |
796f8b2f KH |
217 | (modify-category-entry '(#x3040 . #x309F) ?H) |
218 | (modify-category-entry '(#x3040 . #x3096) ?\|) | |
219 | (modify-category-entry #x309F ?\|) | |
220 | (modify-category-entry #x30A0 ?H) | |
221 | (modify-category-entry #x30FC ?H) | |
222 | ||
269a5dd0 | 223 | |
4ed46869 | 224 | ;; JISX0208 |
66bff5ed KH |
225 | (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2121 #x227E) |
226 | (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2821 #x287E) | |
227 | (let ((chars '(?ー ?゛ ?゜ ?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇))) | |
69c2c6ea | 228 | (dolist (elt chars) |
2b89bca4 | 229 | (modify-syntax-entry elt "w"))) |
66bff5ed KH |
230 | |
231 | (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?A #x2321 #x237E) | |
232 | (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?H #x2421 #x247E) | |
233 | (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?K #x2521 #x257E) | |
234 | (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?G #x2621 #x267E) | |
235 | (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?Y #x2721 #x277E) | |
236 | (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?C #x3021 #x7E7E) | |
66a85e76 | 237 | (let ((chars '(?仝 ?々 ?〆 ?〇))) |
4ed46869 KH |
238 | (while chars |
239 | (modify-category-entry (car chars) ?C) | |
240 | (setq chars (cdr chars)))) | |
241 | ||
242 | ;; JISX0212 | |
4ed46869 | 243 | |
66bff5ed | 244 | (map-charset-chars #'modify-syntax-entry 'japanese-jisx0212 "_" #x2121 #x237E) |
4ed46869 KH |
245 | |
246 | ;; JISX0201-Kana | |
87a39edb | 247 | |
abdaa411 | 248 | (let ((chars '(?。 ?、 ?・))) |
4ed46869 KH |
249 | (while chars |
250 | (modify-syntax-entry (car chars) ".") | |
251 | (setq chars (cdr chars)))) | |
252 | ||
e6d10035 KH |
253 | (modify-syntax-entry ?\「 "(」") |
254 | (modify-syntax-entry ?\」 "(「") | |
226e4119 | 255 | |
4ed46869 KH |
256 | ;; Korean character set (KSC5601) |
257 | ||
87a39edb | 258 | (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?h) |
66bff5ed KH |
259 | |
260 | (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2121 #x227E) | |
439f7264 DL |
261 | (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2621 #x277E) |
262 | (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2830 #x287E) | |
263 | (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2930 #x297E) | |
9ad4b491 KH |
264 | (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2330 #x2339) |
265 | (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2341 #x235A) | |
266 | (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2361 #x237A) | |
66bff5ed KH |
267 | (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?G #x2521 #x257E) |
268 | (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?H #x2A21 #x2A7E) | |
269 | (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?K #x2B21 #x2B7E) | |
270 | (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?Y #x2C21 #x2C7E) | |
4ed46869 | 271 | |
c94ae9eb | 272 | ;; These are in more than one charset. |
8f924df7 KH |
273 | (let ((parens (concat "〈〉《》「」『』【】〔〕〖〗〘〙〚〛" |
274 | "︵︶︷︸︹︺︻︼︽︾︿﹀﹁﹂﹃﹄" | |
275 | "()[]{}")) | |
276 | open close) | |
277 | (dotimes (i (/ (length parens) 2)) | |
278 | (setq open (aref parens (* i 2)) | |
279 | close (aref parens (1+ (* i 2)))) | |
280 | (modify-syntax-entry open (format "(%c" close)) | |
281 | (modify-syntax-entry close (format ")%c" open)))) | |
d05cfa1f | 282 | |
c94ae9eb | 283 | ;; Arabic character set |
6eba8645 | 284 | |
c94ae9eb DL |
285 | (let ((charsets '(arabic-iso8859-6 |
286 | arabic-digit | |
287 | arabic-1-column | |
288 | arabic-2-column))) | |
289 | (while charsets | |
290 | (map-charset-chars #'modify-category-entry (car charsets) ?b) | |
291 | (setq charsets (cdr charsets)))) | |
292 | (modify-category-entry '(#x600 . #x6ff) ?b) | |
293 | (modify-category-entry '(#xfb50 . #xfdff) ?b) | |
294 | (modify-category-entry '(#xfe70 . #xfefe) ?b) | |
6eba8645 | 295 | |
c94ae9eb DL |
296 | ;; Cyrillic character set (ISO-8859-5) |
297 | ||
298 | (modify-syntax-entry ?№ ".") | |
299 | ||
300 | ;; Ethiopic character set | |
301 | ||
4c81b0f6 KH |
302 | (modify-category-entry '(#x1200 . #x1399) ?e) |
303 | (modify-category-entry '(#x2d80 . #x2dde) ?e) | |
55a3ed16 | 304 | (let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨))) |
c94ae9eb DL |
305 | (while chars |
306 | (modify-syntax-entry (car chars) ".") | |
307 | (setq chars (cdr chars)))) | |
308 | (map-charset-chars #'modify-category-entry 'ethiopic ?e) | |
309 | ||
310 | ;; Hebrew character set (ISO-8859-8) | |
311 | ||
312 | (modify-syntax-entry #x5be ".") ; MAQAF | |
313 | (modify-syntax-entry #x5c0 ".") ; PASEQ | |
314 | (modify-syntax-entry #x5c3 ".") ; SOF PASUQ | |
315 | (modify-syntax-entry #x5f3 ".") ; GERESH | |
316 | (modify-syntax-entry #x5f4 ".") ; GERSHAYIM | |
317 | ||
318 | ;; Indian character set (IS 13194 and other Emacs original Indian charsets) | |
319 | ||
320 | (modify-category-entry '(#x901 . #x970) ?i) | |
321 | (map-charset-chars #'modify-category-entry 'indian-is13194 ?i) | |
322 | (map-charset-chars #'modify-category-entry 'indian-2-column ?i) | |
d05cfa1f | 323 | |
6eba8645 KH |
324 | ;; Lao character set |
325 | ||
abdaa411 DL |
326 | (modify-category-entry '(#xe80 . #xeff) ?o) |
327 | (map-charset-chars #'modify-category-entry 'lao ?o) | |
6eba8645 | 328 | |
abdaa411 | 329 | (let ((deflist '(("ກ-ຮ" "w" ?0) ; consonant |
e6d10035 KH |
330 | ("ະາຳຽເ-ໄ" "w" ?1) ; vowel base |
331 | ("ັິ-ືົໍ" "w" ?2) ; vowel upper | |
332 | ("ຸູ" "w" ?3) ; vowel lower | |
8f924df7 | 333 | ("່-໋" "w" ?4) ; tone mark |
e6d10035 KH |
334 | ("ຼຽ" "w" ?9) ; semivowel lower |
335 | ("໐-໙" "w" ?6) ; digit | |
336 | ("ຯໆ" "_" ?5) ; symbol | |
6eba8645 KH |
337 | )) |
338 | elm chars len syntax category to ch i) | |
339 | (while deflist | |
340 | (setq elm (car deflist)) | |
341 | (setq chars (car elm) | |
342 | len (length chars) | |
343 | syntax (nth 1 elm) | |
344 | category (nth 2 elm) | |
345 | i 0) | |
346 | (while (< i len) | |
347 | (if (= (aref chars i) ?-) | |
348 | (setq i (1+ i) | |
4a027a0d KH |
349 | to (aref chars i)) |
350 | (setq ch (aref chars i) | |
6eba8645 KH |
351 | to ch)) |
352 | (while (<= ch to) | |
269a5dd0 DL |
353 | (unless (string-equal syntax "w") |
354 | (modify-syntax-entry ch syntax)) | |
6eba8645 KH |
355 | (modify-category-entry ch category) |
356 | (setq ch (1+ ch))) | |
4a027a0d | 357 | (setq i (1+ i))) |
6eba8645 KH |
358 | (setq deflist (cdr deflist)))) |
359 | ||
4ed46869 KH |
360 | ;; Thai character set (TIS620) |
361 | ||
abdaa411 DL |
362 | (modify-category-entry '(#xe00 . #xe7f) ?t) |
363 | (map-charset-chars #'modify-category-entry 'thai-tis620 ?t) | |
4ed46869 KH |
364 | |
365 | (let ((deflist '(;; chars syntax category | |
e6d10035 KH |
366 | ("ก-รลว-ฮ" "w" ?0) ; consonant |
367 | ("ฤฦะาำเ-ๅ" "w" ?1) ; vowel base | |
368 | ("ัิ-ื็๎" "w" ?2) ; vowel upper | |
369 | ("ุ-ฺ" "w" ?3) ; vowel lower | |
8f924df7 | 370 | ("่-ํ" "w" ?4) ; tone mark |
e6d10035 KH |
371 | ("๐-๙" "w" ?6) ; digit |
372 | ("ฯๆ฿๏๚๛" "_" ?5) ; symbol | |
4ed46869 KH |
373 | )) |
374 | elm chars len syntax category to ch i) | |
9395eb7c KH |
375 | (while deflist |
376 | (setq elm (car deflist)) | |
377 | (setq chars (car elm) | |
378 | len (length chars) | |
379 | syntax (nth 1 elm) | |
380 | category (nth 2 elm) | |
381 | i 0) | |
382 | (while (< i len) | |
383 | (if (= (aref chars i) ?-) | |
384 | (setq i (1+ i) | |
4a027a0d KH |
385 | to (aref chars i)) |
386 | (setq ch (aref chars i) | |
9395eb7c KH |
387 | to ch)) |
388 | (while (<= ch to) | |
269a5dd0 DL |
389 | (unless (string-equal syntax "w") |
390 | (modify-syntax-entry ch syntax)) | |
9395eb7c KH |
391 | (modify-category-entry ch category) |
392 | (setq ch (1+ ch))) | |
4a027a0d | 393 | (setq i (1+ i))) |
9395eb7c KH |
394 | (setq deflist (cdr deflist)))) |
395 | ||
396 | ;; Tibetan character set | |
397 | ||
abdaa411 DL |
398 | (modify-category-entry '(#xf00 . #xfff) ?q) |
399 | (map-charset-chars #'modify-category-entry 'tibetan ?q) | |
400 | (map-charset-chars #'modify-category-entry 'tibetan-1-column ?q) | |
9395eb7c KH |
401 | |
402 | (let ((deflist '(;; chars syntax category | |
725d7c92 | 403 | ("ཀ-ཀྵཪ" "w" ?0) ; consonant |
55a3ed16 | 404 | ("ྐ-ྐྵྺྻྼ" "w" ?0) ; |
725d7c92 DL |
405 | ("ིེཻོཽྀ" "w" ?2) ; upper vowel |
406 | ("ཾྂྃ྆྇ྈྉྊྋ" "w" ?2) ; upper modifier | |
53964682 | 407 | ("྄ཱུ༙༵༷" "w" ?3) ; lower vowel/modifier |
8f924df7 | 408 | ("" "w" ?3) ; invisible vowel a |
725d7c92 DL |
409 | ("༠-༩༪-༳" "w" ?6) ; digit |
410 | ("་།-༒༔ཿ" "." ?|) ; line-break char | |
411 | ("་།༏༐༑༔ཿ" "." ?|) ; | |
412 | ("༈་།-༒༔ཿ༽༴" "." ?>) ; prohibition | |
413 | ("་།༏༐༑༔ཿ" "." ?>) ; | |
414 | ("ༀ-༊༼࿁࿂྅" "." ?<) ; prohibition | |
415 | ("༓༕-༘༚-༟༶༸-༻༾༿྾྿-࿏" "." ?q) ; others | |
9395eb7c KH |
416 | )) |
417 | elm chars len syntax category to ch i) | |
4ed46869 KH |
418 | (while deflist |
419 | (setq elm (car deflist)) | |
420 | (setq chars (car elm) | |
421 | len (length chars) | |
422 | syntax (nth 1 elm) | |
423 | category (nth 2 elm) | |
424 | i 0) | |
425 | (while (< i len) | |
426 | (if (= (aref chars i) ?-) | |
427 | (setq i (1+ i) | |
4a027a0d KH |
428 | to (aref chars i)) |
429 | (setq ch (aref chars i) | |
4ed46869 KH |
430 | to ch)) |
431 | (while (<= ch to) | |
269a5dd0 DL |
432 | (unless (string-equal syntax "w") |
433 | (modify-syntax-entry ch syntax)) | |
4ed46869 KH |
434 | (modify-category-entry ch category) |
435 | (setq ch (1+ ch))) | |
4a027a0d | 436 | (setq i (1+ i))) |
4ed46869 KH |
437 | (setq deflist (cdr deflist)))) |
438 | ||
439 | ;; Vietnamese character set | |
440 | ||
abdaa411 DL |
441 | ;; To make a word with Latin characters |
442 | (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?l) | |
443 | (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?v) | |
444 | ||
445 | (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?l) | |
446 | (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?v) | |
4ed46869 | 447 | |
e5dd1155 KH |
448 | (let ((tbl (standard-case-table)) |
449 | (i 32)) | |
450 | (while (< i 128) | |
725d7c92 DL |
451 | (let* ((char (decode-char 'vietnamese-viscii-upper i)) |
452 | (charl (decode-char 'vietnamese-viscii-lower i)) | |
453 | (uc (encode-char char 'ucs)) | |
454 | (lc (encode-char charl 'ucs))) | |
455 | (set-case-syntax-pair char (decode-char 'vietnamese-viscii-lower i) | |
4eb97232 | 456 | tbl) |
725d7c92 DL |
457 | (if uc (modify-category-entry uc ?v)) |
458 | (if lc (modify-category-entry lc ?v))) | |
e5dd1155 KH |
459 | (setq i (1+ i)))) |
460 | ||
d807d0c7 KH |
461 | ;; Tai Viet |
462 | (let ((deflist '(;; chars syntax category | |
da6062e6 | 463 | ((?ꪀ. ?ꪯ) "w" ?0) ; consonant |
d807d0c7 KH |
464 | ("ꪱꪵꪶ" "w" ?1) ; vowel base |
465 | ((?ꪹ . ?ꪽ) "w" ?1) ; vowel base | |
466 | ("ꪰꪲꪳꪷꪸꪾ" "w" ?2) ; vowel upper | |
467 | ("ꪴ" "w" ?3) ; vowel lower | |
468 | ("ꫀꫂ" "w" ?1) ; non-combining tone-mark | |
469 | ("꪿꫁" "w" ?4) ; combining tone-mark | |
470 | ((?ꫛ . ?꫟) "_" ?5) ; symbol | |
471 | ))) | |
472 | (dolist (elm deflist) | |
473 | (let ((chars (car elm)) | |
474 | (syntax (nth 1 elm)) | |
475 | (category (nth 2 elm))) | |
476 | (if (consp chars) | |
477 | (progn | |
478 | (modify-syntax-entry chars syntax) | |
479 | (modify-category-entry chars category)) | |
480 | (mapc #'(lambda (x) | |
481 | (modify-syntax-entry x syntax) | |
482 | (modify-category-entry x category)) | |
483 | chars))))) | |
c94ae9eb | 484 | |
f635daa1 CY |
485 | ;; Bidi categories |
486 | ||
20372d0c GM |
487 | ;; If bootstrapping without generated uni-*.el files, table not defined. |
488 | (let ((table (unicode-property-table-internal 'bidi-class))) | |
489 | (when table | |
490 | (map-char-table (lambda (key val) | |
491 | (cond | |
492 | ((memq val '(R AL RLO RLE)) | |
493 | (modify-category-entry key ?R)) | |
494 | ((memq val '(L LRE LRO)) | |
495 | (modify-category-entry key ?L)))) | |
496 | table))) | |
f635daa1 | 497 | |
b7cf27ed EZ |
498 | ;; Load uni-mirrored.el if available, so that it gets dumped into |
499 | ;; Emacs. This allows to start Emacs with force-load-messages in | |
500 | ;; ~/.emacs, and avoid infinite recursion in bidi_initialize, which | |
501 | ;; needs to load uni-mirrored.el in order to display the "Loading" | |
502 | ;; messages. | |
503 | (unicode-property-table-internal 'mirroring) | |
bbab1c4f | 504 | |
c94ae9eb DL |
505 | ;; Latin |
506 | ||
507 | (modify-category-entry '(#x80 . #x024F) ?l) | |
d05cfa1f | 508 | |
85ef8ece KH |
509 | (let ((tbl (standard-case-table)) c) |
510 | ||
4fb82d62 DL |
511 | ;; Latin-1 |
512 | ||
513 | ;; Fixme: Some of the non-word syntaxes here perhaps should be | |
514 | ;; reviewed. (Note that the following all implicitly have word | |
515 | ;; syntax: ¢£¤¥¨ª¯²³´¶¸¹º.) There should be a well-defined way of | |
516 | ;; relating Unicode categories to Emacs syntax codes. | |
db92e81e KH |
517 | |
518 | ;; NBSP isn't semantically interchangeable with other whitespace chars, | |
e1dbe924 | 519 | ;; so it's more like punctuation. |
db92e81e | 520 | (set-case-syntax ? "." tbl) |
4fb82d62 DL |
521 | (set-case-syntax ?¡ "." tbl) |
522 | (set-case-syntax ?¦ "_" tbl) | |
523 | (set-case-syntax ?§ "." tbl) | |
524 | (set-case-syntax ?© "_" tbl) | |
db3b7db5 SM |
525 | ;; French wants |
526 | ;; (set-case-syntax-delims ?« ?» tbl) | |
527 | ;; And German wants | |
528 | ;; (set-case-syntax-delims ?» ?« tbl) | |
529 | ;; So let's stay neutral and let users set these up if/when they want to. | |
530 | (set-case-syntax ?« "." tbl) | |
531 | (set-case-syntax ?» "." tbl) | |
4fb82d62 DL |
532 | (set-case-syntax ?¬ "_" tbl) |
533 | (set-case-syntax ? "_" tbl) | |
534 | (set-case-syntax ?® "_" tbl) | |
535 | (set-case-syntax ?° "_" tbl) | |
536 | (set-case-syntax ?± "_" tbl) | |
537 | (set-case-syntax ?µ "_" tbl) | |
538 | (set-case-syntax ?· "_" tbl) | |
539 | (set-case-syntax ?¼ "_" tbl) | |
540 | (set-case-syntax ?½ "_" tbl) | |
541 | (set-case-syntax ?¾ "_" tbl) | |
542 | (set-case-syntax ?¿ "." tbl) | |
543 | (let ((c 192)) | |
544 | (while (<= c 222) | |
545 | (set-case-syntax-pair c (+ c 32) tbl) | |
546 | (setq c (1+ c)))) | |
547 | (set-case-syntax ?× "_" tbl) | |
548 | (set-case-syntax ?ß "w" tbl) | |
549 | (set-case-syntax ?÷ "_" tbl) | |
550 | ;; See below for ÿ. | |
85ef8ece | 551 | |
85ef8ece KH |
552 | ;; Latin Extended-A, Latin Extended-B |
553 | (setq c #x0100) | |
e5e381c8 KH |
554 | (while (<= c #x02B8) |
555 | (modify-category-entry c ?l) | |
d05cfa1f | 556 | (setq c (1+ c))) |
2bb915b8 | 557 | |
e5e381c8 KH |
558 | (let ((pair-ranges '((#x0100 . #x012F) |
559 | (#x0132 . #x0137) | |
560 | (#x0139 . #x0148) | |
561 | (#x014a . #x0177) | |
562 | (#x0179 . #x017E) | |
563 | (#x0182 . #x0185) | |
796f8b2f KH |
564 | (#x0187 . #x0188) |
565 | (#x018B . #x018C) | |
e5e381c8 KH |
566 | (#x0191 . #x0192) |
567 | (#x0198 . #x0199) | |
568 | (#x01A0 . #x01A5) | |
569 | (#x01A7 . #x01A8) | |
570 | (#x01AC . #x01AD) | |
571 | (#x01AF . #x01B0) | |
572 | (#x01B3 . #x01B6) | |
d0203d61 | 573 | (#x01B8 . #x01B9) |
e5e381c8 KH |
574 | (#x01BC . #x01BD) |
575 | (#x01CD . #x01DC) | |
576 | (#x01DE . #x01EF) | |
577 | (#x01F4 . #x01F5) | |
578 | (#x01F8 . #x021F) | |
579 | (#x0222 . #x0233) | |
580 | (#x023B . #x023C) | |
581 | (#x0241 . #x0242) | |
582 | (#x0246 . #x024F)))) | |
583 | (dolist (elt pair-ranges) | |
584 | (let ((from (car elt)) (to (cdr elt))) | |
585 | (while (< from to) | |
586 | (set-case-syntax-pair from (1+ from) tbl) | |
587 | (setq from (+ from 2)))))) | |
2bb915b8 | 588 | |
d0203d61 | 589 | (set-case-syntax-pair ?Ÿ ?ÿ tbl) |
796f8b2f | 590 | |
2bb915b8 KH |
591 | ;; In some languages, such as Turkish, U+0049 LATIN CAPITAL LETTER I |
592 | ;; and U+0131 LATIN SMALL LETTER DOTLESS I make a case pair, and so | |
593 | ;; do U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN | |
594 | ;; SMALL LETTER I. | |
595 | ||
596 | ;; We used to set up half of those correspondence unconditionally, | |
597 | ;; but that makes searches slow. So now we don't set up either half | |
598 | ;; of these correspondences by default. | |
599 | ||
600 | ;; (set-downcase-syntax ?İ ?i tbl) | |
601 | ;; (set-upcase-syntax ?I ?ı tbl) | |
602 | ||
0d93216c AS |
603 | (set-case-syntax-pair ?Ɓ ?ɓ tbl) |
604 | (set-case-syntax-pair ?Ɔ ?ɔ tbl) | |
d0203d61 AS |
605 | (set-case-syntax-pair ?Ɖ ?ɖ tbl) |
606 | (set-case-syntax-pair ?Ɗ ?ɗ tbl) | |
0d93216c AS |
607 | (set-case-syntax-pair ?Ǝ ?ǝ tbl) |
608 | (set-case-syntax-pair ?Ə ?ə tbl) | |
609 | (set-case-syntax-pair ?Ɛ ?ɛ tbl) | |
610 | (set-case-syntax-pair ?Ɠ ?ɠ tbl) | |
611 | (set-case-syntax-pair ?Ɣ ?ɣ tbl) | |
612 | (set-case-syntax-pair ?Ɩ ?ɩ tbl) | |
613 | (set-case-syntax-pair ?Ɨ ?ɨ tbl) | |
614 | (set-case-syntax-pair ?Ɯ ?ɯ tbl) | |
615 | (set-case-syntax-pair ?Ɲ ?ɲ tbl) | |
616 | (set-case-syntax-pair ?Ɵ ?ɵ tbl) | |
617 | (set-case-syntax-pair ?Ʀ ?ʀ tbl) | |
618 | (set-case-syntax-pair ?Ʃ ?ʃ tbl) | |
619 | (set-case-syntax-pair ?Ʈ ?ʈ tbl) | |
620 | (set-case-syntax-pair ?Ʊ ?ʊ tbl) | |
621 | (set-case-syntax-pair ?Ʋ ?ʋ tbl) | |
622 | (set-case-syntax-pair ?Ʒ ?ʒ tbl) | |
e6d10035 KH |
623 | (set-case-syntax-pair ?DŽ ?dž tbl) |
624 | (set-case-syntax-pair ?Dž ?dž tbl) | |
625 | (set-case-syntax-pair ?LJ ?lj tbl) | |
626 | (set-case-syntax-pair ?Lj ?lj tbl) | |
627 | (set-case-syntax-pair ?NJ ?nj tbl) | |
628 | (set-case-syntax-pair ?Nj ?nj tbl) | |
e5e381c8 | 629 | |
269a5dd0 | 630 | ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON |
e6d10035 KH |
631 | (set-case-syntax-pair ?DZ ?dz tbl) |
632 | (set-case-syntax-pair ?Dz ?dz tbl) | |
e6d10035 KH |
633 | (set-case-syntax-pair ?Ƕ ?ƕ tbl) |
634 | (set-case-syntax-pair ?Ƿ ?ƿ tbl) | |
cb80bcd1 EZ |
635 | (set-case-syntax-pair ?Ⱥ ?ⱥ tbl) |
636 | (set-case-syntax-pair ?Ƚ ?ƚ tbl) | |
637 | (set-case-syntax-pair ?Ⱦ ?ⱦ tbl) | |
638 | (set-case-syntax-pair ?Ƀ ?ƀ tbl) | |
639 | (set-case-syntax-pair ?Ʉ ?ʉ tbl) | |
640 | (set-case-syntax-pair ?Ʌ ?ʌ tbl) | |
269a5dd0 | 641 | |
85ef8ece | 642 | ;; Latin Extended Additional |
abdaa411 | 643 | (modify-category-entry '(#x1e00 . #x1ef9) ?l) |
85ef8ece | 644 | (setq c #x1e00) |
d05cfa1f | 645 | (while (<= c #x1ef9) |
d05cfa1f KH |
646 | (and (zerop (% c 2)) |
647 | (or (<= c #x1e94) (>= c #x1ea0)) | |
abdaa411 | 648 | (set-case-syntax-pair c (1+ c) tbl)) |
d05cfa1f KH |
649 | (setq c (1+ c))) |
650 | ||
85ef8ece | 651 | ;; Greek |
abdaa411 | 652 | (modify-category-entry '(#x0370 . #x03ff) ?g) |
85ef8ece | 653 | (setq c #x0370) |
d05cfa1f | 654 | (while (<= c #x03ff) |
d05cfa1f KH |
655 | (if (or (and (>= c #x0391) (<= c #x03a1)) |
656 | (and (>= c #x03a3) (<= c #x03ab))) | |
abdaa411 | 657 | (set-case-syntax-pair c (+ c 32) tbl)) |
d05cfa1f KH |
658 | (and (>= c #x03da) |
659 | (<= c #x03ee) | |
660 | (zerop (% c 2)) | |
abdaa411 | 661 | (set-case-syntax-pair c (1+ c) tbl)) |
d05cfa1f | 662 | (setq c (1+ c))) |
e6d10035 KH |
663 | (set-case-syntax-pair ?Ά ?ά tbl) |
664 | (set-case-syntax-pair ?Έ ?έ tbl) | |
665 | (set-case-syntax-pair ?Ή ?ή tbl) | |
666 | (set-case-syntax-pair ?Ί ?ί tbl) | |
667 | (set-case-syntax-pair ?Ό ?ό tbl) | |
668 | (set-case-syntax-pair ?Ύ ?ύ tbl) | |
669 | (set-case-syntax-pair ?Ώ ?ώ tbl) | |
d05cfa1f | 670 | |
269a5dd0 DL |
671 | ;; Armenian |
672 | (setq c #x531) | |
673 | (while (<= c #x556) | |
abdaa411 | 674 | (set-case-syntax-pair c (+ c #x30) tbl) |
269a5dd0 DL |
675 | (setq c (1+ c))) |
676 | ||
85ef8ece | 677 | ;; Greek Extended |
abdaa411 | 678 | (modify-category-entry '(#x1f00 . #x1fff) ?g) |
85ef8ece | 679 | (setq c #x1f00) |
d05cfa1f | 680 | (while (<= c #x1fff) |
d05cfa1f KH |
681 | (and (<= (logand c #x000f) 7) |
682 | (<= c #x1fa7) | |
796f8b2f KH |
683 | (not (memq c '(#x1f16 #x1f17 #x1f56 #x1f57 |
684 | #x1f50 #x1f52 #x1f54 #x1f56))) | |
685 | (/= (logand c #x00f0) #x70) | |
abdaa411 | 686 | (set-case-syntax-pair (+ c 8) c tbl)) |
d05cfa1f | 687 | (setq c (1+ c))) |
e6d10035 KH |
688 | (set-case-syntax-pair ?Ᾰ ?ᾰ tbl) |
689 | (set-case-syntax-pair ?Ᾱ ?ᾱ tbl) | |
690 | (set-case-syntax-pair ?Ὰ ?ὰ tbl) | |
691 | (set-case-syntax-pair ?Ά ?ά tbl) | |
692 | (set-case-syntax-pair ?ᾼ ?ᾳ tbl) | |
693 | (set-case-syntax-pair ?Ὲ ?ὲ tbl) | |
694 | (set-case-syntax-pair ?Έ ?έ tbl) | |
695 | (set-case-syntax-pair ?Ὴ ?ὴ tbl) | |
696 | (set-case-syntax-pair ?Ή ?ή tbl) | |
697 | (set-case-syntax-pair ?ῌ ?ῃ tbl) | |
698 | (set-case-syntax-pair ?Ῐ ?ῐ tbl) | |
699 | (set-case-syntax-pair ?Ῑ ?ῑ tbl) | |
700 | (set-case-syntax-pair ?Ὶ ?ὶ tbl) | |
701 | (set-case-syntax-pair ?Ί ?ί tbl) | |
702 | (set-case-syntax-pair ?Ῠ ?ῠ tbl) | |
703 | (set-case-syntax-pair ?Ῡ ?ῡ tbl) | |
704 | (set-case-syntax-pair ?Ὺ ?ὺ tbl) | |
705 | (set-case-syntax-pair ?Ύ ?ύ tbl) | |
706 | (set-case-syntax-pair ?Ῥ ?ῥ tbl) | |
707 | (set-case-syntax-pair ?Ὸ ?ὸ tbl) | |
708 | (set-case-syntax-pair ?Ό ?ό tbl) | |
709 | (set-case-syntax-pair ?Ὼ ?ὼ tbl) | |
710 | (set-case-syntax-pair ?Ώ ?ώ tbl) | |
711 | (set-case-syntax-pair ?ῼ ?ῳ tbl) | |
d05cfa1f | 712 | |
85ef8ece | 713 | ;; cyrillic |
abdaa411 | 714 | (modify-category-entry '(#x0400 . #x04FF) ?y) |
85ef8ece | 715 | (setq c #x0400) |
d05cfa1f | 716 | (while (<= c #x04ff) |
d05cfa1f KH |
717 | (and (>= c #x0400) |
718 | (<= c #x040f) | |
abdaa411 | 719 | (set-case-syntax-pair c (+ c 80) tbl)) |
d05cfa1f KH |
720 | (and (>= c #x0410) |
721 | (<= c #x042f) | |
abdaa411 | 722 | (set-case-syntax-pair c (+ c 32) tbl)) |
d05cfa1f KH |
723 | (and (zerop (% c 2)) |
724 | (or (and (>= c #x0460) (<= c #x0480)) | |
725 | (and (>= c #x048c) (<= c #x04be)) | |
726 | (and (>= c #x04d0) (<= c #x04f4))) | |
8f924df7 | 727 | (set-case-syntax-pair c (1+ c) tbl)) |
d05cfa1f | 728 | (setq c (1+ c))) |
e6d10035 KH |
729 | (set-case-syntax-pair ?Ӂ ?ӂ tbl) |
730 | (set-case-syntax-pair ?Ӄ ?ӄ tbl) | |
731 | (set-case-syntax-pair ?Ӈ ?ӈ tbl) | |
732 | (set-case-syntax-pair ?Ӌ ?ӌ tbl) | |
733 | (set-case-syntax-pair ?Ӹ ?ӹ tbl) | |
d05cfa1f | 734 | |
85ef8ece KH |
735 | ;; general punctuation |
736 | (setq c #x2000) | |
d05cfa1f KH |
737 | (while (<= c #x200b) |
738 | (set-case-syntax c " " tbl) | |
739 | (setq c (1+ c))) | |
b427c97e DL |
740 | (while (<= c #x200F) |
741 | (set-case-syntax c "." tbl) | |
742 | (setq c (1+ c))) | |
743 | ;; Fixme: These aren't all right: | |
6b61353c KH |
744 | (setq c #x2010) |
745 | (while (<= c #x2016) | |
746 | (set-case-syntax c "_" tbl) | |
747 | (setq c (1+ c))) | |
748 | ;; Punctuation syntax for quotation marks (like `) | |
749 | (while (<= c #x201f) | |
750 | (set-case-syntax c "." tbl) | |
751 | (setq c (1+ c))) | |
752 | ;; Fixme: These aren't all right: | |
d05cfa1f KH |
753 | (while (<= c #x2027) |
754 | (set-case-syntax c "_" tbl) | |
755 | (setq c (1+ c))) | |
b427c97e DL |
756 | (while (<= c #x206F) |
757 | (set-case-syntax c "." tbl) | |
758 | (setq c (1+ c))) | |
d05cfa1f | 759 | |
269a5dd0 DL |
760 | ;; Roman numerals |
761 | (setq c #x2160) | |
762 | (while (<= c #x216f) | |
abdaa411 | 763 | (set-case-syntax-pair c (+ c #x10) tbl) |
269a5dd0 DL |
764 | (setq c (1+ c))) |
765 | ||
4fb82d62 DL |
766 | ;; Fixme: The following blocks might be better as symbol rather than |
767 | ;; punctuation. | |
b427c97e DL |
768 | ;; Arrows |
769 | (setq c #x2190) | |
6ca54a3a DL |
770 | (while (<= c #x21FF) |
771 | (set-case-syntax c "." tbl) | |
b427c97e DL |
772 | (setq c (1+ c))) |
773 | ;; Mathematical Operators | |
774 | (while (<= c #x22FF) | |
6ca54a3a | 775 | (set-case-syntax c "." tbl) |
b427c97e DL |
776 | (setq c (1+ c))) |
777 | ;; Miscellaneous Technical | |
778 | (while (<= c #x23FF) | |
6ca54a3a | 779 | (set-case-syntax c "." tbl) |
b427c97e DL |
780 | (setq c (1+ c))) |
781 | ;; Control Pictures | |
782 | (while (<= c #x243F) | |
6ca54a3a | 783 | (set-case-syntax c "_" tbl) |
269a5dd0 DL |
784 | (setq c (1+ c))) |
785 | ||
786 | ;; Circled Latin | |
787 | (setq c #x24b6) | |
788 | (while (<= c #x24cf) | |
abdaa411 DL |
789 | (set-case-syntax-pair c (+ c 26) tbl) |
790 | (modify-category-entry c ?l) | |
791 | (modify-category-entry (+ c 26) ?l) | |
269a5dd0 DL |
792 | (setq c (1+ c))) |
793 | ||
ac387dd1 EZ |
794 | ;; Coptic |
795 | (let ((pair-ranges '((#x2C80 . #x2CE2) | |
796 | (#x2CEB . #x2CF2)))) | |
797 | (dolist (elt pair-ranges) | |
798 | (let ((from (car elt)) (to (cdr elt))) | |
799 | (while (< from to) | |
800 | (set-case-syntax-pair from (1+ from) tbl) | |
ac387dd1 | 801 | (setq from (+ from 2)))))) |
204db02a EZ |
802 | ;; There's no Coptic category. However, Coptic letters that are |
803 | ;; part of the Greek block above get the Greek category, and those | |
804 | ;; in this block are derived from Greek letters, so let's be | |
805 | ;; consistent about their category. | |
806 | (modify-category-entry '(#x2C80 . #x2CFF) ?g) | |
ac387dd1 | 807 | |
269a5dd0 DL |
808 | ;; Fullwidth Latin |
809 | (setq c #xff21) | |
810 | (while (<= c #xff3a) | |
abdaa411 DL |
811 | (set-case-syntax-pair c (+ c #x20) tbl) |
812 | (modify-category-entry c ?l) | |
813 | (modify-category-entry (+ c #x20) ?l) | |
269a5dd0 DL |
814 | (setq c (1+ c))) |
815 | ||
269a5dd0 | 816 | ;; Combining diacritics |
abdaa411 | 817 | (modify-category-entry '(#x300 . #x362) ?^) |
269a5dd0 | 818 | ;; Combining marks |
0ca754d0 | 819 | (modify-category-entry '(#x20d0 . #x20ff) ?^) |
269a5dd0 DL |
820 | |
821 | ;; Fixme: syntax for symbols &c | |
822 | ) | |
6b61353c KH |
823 | |
824 | (let ((pairs | |
e55a4d4e KH |
825 | '("⁅⁆" ; U+2045 U+2046 |
826 | "⁽⁾" ; U+207D U+207E | |
827 | "₍₎" ; U+208D U+208E | |
828 | "〈〉" ; U+2329 U+232A | |
829 | "⎴⎵" ; U+23B4 U+23B5 | |
830 | "❨❩" ; U+2768 U+2769 | |
831 | "❪❫" ; U+276A U+276B | |
832 | "❬❭" ; U+276C U+276D | |
833 | "❰❱" ; U+2770 U+2771 | |
834 | "❲❳" ; U+2772 U+2773 | |
835 | "❴❵" ; U+2774 U+2775 | |
836 | "⟦⟧" ; U+27E6 U+27E7 | |
837 | "⟨⟩" ; U+27E8 U+27E9 | |
838 | "⟪⟫" ; U+27EA U+27EB | |
839 | "⦃⦄" ; U+2983 U+2984 | |
840 | "⦅⦆" ; U+2985 U+2986 | |
841 | "⦇⦈" ; U+2987 U+2988 | |
842 | "⦉⦊" ; U+2989 U+298A | |
843 | "⦋⦌" ; U+298B U+298C | |
844 | "⦍⦎" ; U+298D U+298E | |
845 | "⦏⦐" ; U+298F U+2990 | |
846 | "⦑⦒" ; U+2991 U+2992 | |
847 | "⦓⦔" ; U+2993 U+2994 | |
848 | "⦕⦖" ; U+2995 U+2996 | |
849 | "⦗⦘" ; U+2997 U+2998 | |
850 | "⧼⧽" ; U+29FC U+29FD | |
851 | "〈〉" ; U+3008 U+3009 | |
852 | "《》" ; U+300A U+300B | |
853 | "「」" ; U+300C U+300D | |
854 | "『』" ; U+300E U+300F | |
855 | "【】" ; U+3010 U+3011 | |
856 | "〔〕" ; U+3014 U+3015 | |
857 | "〖〗" ; U+3016 U+3017 | |
858 | "〘〙" ; U+3018 U+3019 | |
859 | "〚〛" ; U+301A U+301B | |
860 | "﴾﴿" ; U+FD3E U+FD3F | |
861 | "︵︶" ; U+FE35 U+FE36 | |
862 | "︷︸" ; U+FE37 U+FE38 | |
863 | "︹︺" ; U+FE39 U+FE3A | |
864 | "︻︼" ; U+FE3B U+FE3C | |
865 | "︽︾" ; U+FE3D U+FE3E | |
866 | "︿﹀" ; U+FE3F U+FE40 | |
867 | "﹁﹂" ; U+FE41 U+FE42 | |
868 | "﹃﹄" ; U+FE43 U+FE44 | |
869 | "﹙﹚" ; U+FE59 U+FE5A | |
870 | "﹛﹜" ; U+FE5B U+FE5C | |
871 | "﹝﹞" ; U+FE5D U+FE5E | |
872 | "()" ; U+FF08 U+FF09 | |
873 | "[]" ; U+FF3B U+FF3D | |
874 | "{}" ; U+FF5B U+FF5D | |
875 | "⦅⦆" ; U+FF5F U+FF60 | |
876 | "「」" ; U+FF62 U+FF63 | |
6b61353c KH |
877 | ))) |
878 | (dolist (elt pairs) | |
879 | (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1))) | |
880 | (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0))))) | |
881 | ||
4ed46869 | 882 | \f |
777cfce6 | 883 | ;; For each character set, put the information of the most proper |
aaa9f206 | 884 | ;; coding system to encode it by `preferred-coding-system' property. |
777cfce6 | 885 | |
abdaa411 | 886 | ;; Fixme: should this be junked? |
777cfce6 KH |
887 | (let ((l '((latin-iso8859-1 . iso-latin-1) |
888 | (latin-iso8859-2 . iso-latin-2) | |
889 | (latin-iso8859-3 . iso-latin-3) | |
890 | (latin-iso8859-4 . iso-latin-4) | |
891 | (thai-tis620 . thai-tis620) | |
892 | (greek-iso8859-7 . greek-iso-8bit) | |
893 | (arabic-iso8859-6 . iso-2022-7bit) | |
894 | (hebrew-iso8859-8 . hebrew-iso-8bit) | |
895 | (katakana-jisx0201 . japanese-shift-jis) | |
896 | (latin-jisx0201 . japanese-shift-jis) | |
897 | (cyrillic-iso8859-5 . cyrillic-iso-8bit) | |
898 | (latin-iso8859-9 . iso-latin-5) | |
899 | (japanese-jisx0208-1978 . iso-2022-jp) | |
7870bdd9 KH |
900 | (chinese-gb2312 . chinese-iso-8bit) |
901 | (chinese-gbk . chinese-gbk) | |
902 | (gb18030-2-byte . chinese-gb18030) | |
903 | (gb18030-4-byte-bmp . chinese-gb18030) | |
904 | (gb18030-4-byte-smp . chinese-gb18030) | |
905 | (gb18030-4-byte-ext-1 . chinese-gb18030) | |
906 | (gb18030-4-byte-ext-2 . chinese-gb18030) | |
777cfce6 KH |
907 | (japanese-jisx0208 . iso-2022-jp) |
908 | (korean-ksc5601 . iso-2022-kr) | |
909 | (japanese-jisx0212 . iso-2022-jp) | |
777cfce6 KH |
910 | (chinese-big5-1 . chinese-big5) |
911 | (chinese-big5-2 . chinese-big5) | |
912 | (chinese-sisheng . iso-2022-7bit) | |
913 | (ipa . iso-2022-7bit) | |
914 | (vietnamese-viscii-lower . vietnamese-viscii) | |
915 | (vietnamese-viscii-upper . vietnamese-viscii) | |
916 | (arabic-digit . iso-2022-7bit) | |
917 | (arabic-1-column . iso-2022-7bit) | |
777cfce6 KH |
918 | (lao . lao) |
919 | (arabic-2-column . iso-2022-7bit) | |
920 | (indian-is13194 . devanagari) | |
69e138b2 | 921 | (indian-glyph . devanagari) |
777cfce6 | 922 | (tibetan-1-column . tibetan) |
58cd41a3 | 923 | (ethiopic . iso-2022-7bit) |
7870bdd9 KH |
924 | (chinese-cns11643-1 . iso-2022-cn) |
925 | (chinese-cns11643-2 . iso-2022-cn) | |
777cfce6 KH |
926 | (chinese-cns11643-3 . iso-2022-cn) |
927 | (chinese-cns11643-4 . iso-2022-cn) | |
928 | (chinese-cns11643-5 . iso-2022-cn) | |
929 | (chinese-cns11643-6 . iso-2022-cn) | |
930 | (chinese-cns11643-7 . iso-2022-cn) | |
931 | (indian-2-column . devanagari) | |
7a860cf2 DL |
932 | (tibetan . tibetan) |
933 | (latin-iso8859-14 . iso-latin-8) | |
934 | (latin-iso8859-15 . iso-latin-9)))) | |
777cfce6 | 935 | (while l |
aaa9f206 | 936 | (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l))) |
777cfce6 | 937 | (setq l (cdr l)))) |
df0415c5 KH |
938 | |
939 | \f | |
98a663f1 | 940 | ;; Setup auto-fill-chars for charsets that should invoke auto-filling. |
7760ba82 | 941 | ;; SPACE and NEWLINE are already set. |
df21429c KH |
942 | |
943 | (set-char-table-range auto-fill-chars '(#x3041 . #x30FF) t) | |
944 | (set-char-table-range auto-fill-chars '(#x3400 . #x4DB5) t) | |
945 | (set-char-table-range auto-fill-chars '(#x4e00 . #x9fbb) t) | |
946 | (set-char-table-range auto-fill-chars '(#xF900 . #xFAFF) t) | |
947 | (set-char-table-range auto-fill-chars '(#xFF00 . #xFF9F) t) | |
948 | (set-char-table-range auto-fill-chars '(#x20000 . #x2FFFF) t) | |
949 | ||
55bd52ea | 950 | \f |
7760ba82 KH |
951 | ;;; Setting char-width-table. The default is 1. |
952 | ||
953 | ;; 0: non-spacing, enclosing combining, formatting, Hangul Jamo medial | |
954 | ;; and final characters. | |
a2a22302 | 955 | (let ((l '((#x0300 . #x036F) |
7760ba82 KH |
956 | (#x0483 . #x0489) |
957 | (#x0591 . #x05BD) | |
958 | (#x05BF . #x05BF) | |
959 | (#x05C1 . #x05C2) | |
960 | (#x05C4 . #x05C5) | |
961 | (#x05C7 . #x05C7) | |
962 | (#x0600 . #x0603) | |
963 | (#x0610 . #x0615) | |
964 | (#x064B . #x065E) | |
965 | (#x0670 . #x0670) | |
966 | (#x06D6 . #x06E4) | |
967 | (#x06E7 . #x06E8) | |
968 | (#x06EA . #x06ED) | |
969 | (#x070F . #x070F) | |
970 | (#x0711 . #x0711) | |
971 | (#x0730 . #x074A) | |
972 | (#x07A6 . #x07B0) | |
973 | (#x07EB . #x07F3) | |
974 | (#x0901 . #x0902) | |
975 | (#x093C . #x093C) | |
976 | (#x0941 . #x0948) | |
977 | (#x094D . #x094D) | |
978 | (#x0951 . #x0954) | |
979 | (#x0962 . #x0963) | |
980 | (#x0981 . #x0981) | |
981 | (#x09BC . #x09BC) | |
982 | (#x09C1 . #x09C4) | |
983 | (#x09CD . #x09CD) | |
984 | (#x09E2 . #x09E3) | |
985 | (#x0A01 . #x0A02) | |
986 | (#x0A3C . #x0A3C) | |
987 | (#x0A41 . #x0A4D) | |
988 | (#x0A70 . #x0A71) | |
989 | (#x0A81 . #x0A82) | |
990 | (#x0ABC . #x0ABC) | |
991 | (#x0AC1 . #x0AC8) | |
992 | (#x0ACD . #x0ACD) | |
993 | (#x0AE2 . #x0AE3) | |
994 | (#x0B01 . #x0B01) | |
995 | (#x0B3C . #x0B3C) | |
996 | (#x0B3F . #x0B3F) | |
997 | (#x0B41 . #x0B43) | |
998 | (#x0B4D . #x0B56) | |
999 | (#x0B82 . #x0B82) | |
1000 | (#x0BC0 . #x0BC0) | |
1001 | (#x0BCD . #x0BCD) | |
1002 | (#x0C3E . #x0C40) | |
1003 | (#x0C46 . #x0C56) | |
1004 | (#x0CBC . #x0CBC) | |
1005 | (#x0CBF . #x0CBF) | |
1006 | (#x0CC6 . #x0CC6) | |
1007 | (#x0CCC . #x0CCD) | |
1008 | (#x0CE2 . #x0CE3) | |
1009 | (#x0D41 . #x0D43) | |
1010 | (#x0D4D . #x0D4D) | |
1011 | (#x0DCA . #x0DCA) | |
1012 | (#x0DD2 . #x0DD6) | |
1013 | (#x0E31 . #x0E31) | |
1014 | (#x0E34 . #x0E3A) | |
1015 | (#x0E47 . #x0E4E) | |
1016 | (#x0EB1 . #x0EB1) | |
1017 | (#x0EB4 . #x0EBC) | |
1018 | (#x0EC8 . #x0ECD) | |
1019 | (#x0F18 . #x0F19) | |
1020 | (#x0F35 . #x0F35) | |
1021 | (#x0F37 . #x0F37) | |
1022 | (#x0F39 . #x0F39) | |
1023 | (#x0F71 . #x0F7E) | |
1024 | (#x0F80 . #x0F84) | |
1025 | (#x0F86 . #x0F87) | |
1026 | (#x0F90 . #x0FBC) | |
1027 | (#x0FC6 . #x0FC6) | |
1028 | (#x102D . #x1030) | |
1029 | (#x1032 . #x1037) | |
1030 | (#x1039 . #x1039) | |
1031 | (#x1058 . #x1059) | |
1032 | (#x1160 . #x11FF) | |
1033 | (#x135F . #x135F) | |
1034 | (#x1712 . #x1714) | |
1035 | (#x1732 . #x1734) | |
1036 | (#x1752 . #x1753) | |
1037 | (#x1772 . #x1773) | |
1038 | (#x17B4 . #x17B5) | |
1039 | (#x17B7 . #x17BD) | |
1040 | (#x17C6 . #x17C6) | |
1041 | (#x17C9 . #x17D3) | |
1042 | (#x17DD . #x17DD) | |
1043 | (#x180B . #x180D) | |
1044 | (#x18A9 . #x18A9) | |
1045 | (#x1920 . #x1922) | |
1046 | (#x1927 . #x1928) | |
1047 | (#x1932 . #x1932) | |
1048 | (#x1939 . #x193B) | |
1049 | (#x1A17 . #x1A18) | |
1050 | (#x1B00 . #x1B03) | |
1051 | (#x1B34 . #x1B34) | |
1052 | (#x1B36 . #x1B3A) | |
1053 | (#x1B3C . #x1B3C) | |
1054 | (#x1B42 . #x1B42) | |
1055 | (#x1B6B . #x1B73) | |
1056 | (#x1DC0 . #x1DFF) | |
1057 | (#x200B . #x200F) | |
1058 | (#x202A . #x202E) | |
1059 | (#x2060 . #x206F) | |
1060 | (#x20D0 . #x20EF) | |
1061 | (#x302A . #x302F) | |
1062 | (#x3099 . #x309A) | |
1063 | (#xA806 . #xA806) | |
1064 | (#xA80B . #xA80B) | |
1065 | (#xA825 . #xA826) | |
1066 | (#xFB1E . #xFB1E) | |
1067 | (#xFE00 . #xFE0F) | |
1068 | (#xFE20 . #xFE23) | |
1069 | (#xFEFF . #xFEFF) | |
1070 | (#xFFF9 . #xFFFB) | |
1071 | (#x10A01 . #x10A0F) | |
1072 | (#x10A38 . #x10A3F) | |
1073 | (#x1D167 . #x1D169) | |
1074 | (#x1D173 . #x1D182) | |
1075 | (#x1D185 . #x1D18B) | |
1076 | (#x1D1AA . #x1D1AD) | |
1077 | (#x1D242 . #x1D244) | |
1078 | (#xE0001 . #xE01EF)))) | |
1079 | (dolist (elt l) | |
1080 | (set-char-table-range char-width-table elt 0))) | |
1081 | ||
1082 | ;; 2: East Asian Wide and Full-width characters. | |
1083 | (let ((l '((#x1100 . #x115F) | |
1084 | (#x2329 . #x232A) | |
1085 | (#x2E80 . #x303E) | |
1086 | (#x3040 . #xA4CF) | |
1087 | (#xAC00 . #xD7A3) | |
ed0cb465 | 1088 | (#xF900 . #xFAFF) |
7760ba82 | 1089 | (#xFE30 . #xFE6F) |
bb5c62cf | 1090 | (#xFF01 . #xFF60) |
7760ba82 KH |
1091 | (#xFFE0 . #xFFE6) |
1092 | (#x20000 . #x2FFFF) | |
1093 | (#x30000 . #x3FFFF)))) | |
ed0cb465 | 1094 | (dolist (elt l) |
7760ba82 | 1095 | (set-char-table-range char-width-table elt 2))) |
173f18ce DL |
1096 | |
1097 | ;; Other double width | |
7760ba82 KH |
1098 | ;;(map-charset-chars |
1099 | ;; (lambda (range ignore) (set-char-table-range char-width-table range 2)) | |
1100 | ;; 'ethiopic) | |
1101 | ;; (map-charset-chars | |
1102 | ;; (lambda (range ignore) (set-char-table-range char-width-table range 2)) | |
1103 | ;; 'tibetan) | |
173f18ce | 1104 | (map-charset-chars |
9d3aa82c | 1105 | (lambda (range _ignore) (set-char-table-range char-width-table range 2)) |
173f18ce DL |
1106 | 'indian-2-column) |
1107 | (map-charset-chars | |
9d3aa82c | 1108 | (lambda (range _ignore) (set-char-table-range char-width-table range 2)) |
173f18ce | 1109 | 'arabic-2-column) |
777cfce6 | 1110 | |
dbff07a2 KH |
1111 | ;; Internal use only. |
1112 | ;; Alist of locale symbol vs charsets. In a language environment | |
1113 | ;; corresponding to the locale, width of characters in the charsets is | |
1114 | ;; set to 2. Each element has the form: | |
1115 | ;; (LOCALE TABLE (CHARSET (FROM-CODE . TO-CODE) ...) ...) | |
1116 | ;; LOCALE: locale symbol | |
1117 | ;; TABLE: char-table used for char-width-table, initially nil. | |
d5081c1e | 1118 | ;; CHARSET: character set |
dbff07a2 KH |
1119 | ;; FROM-CODE, TO-CODE: range of code-points in CHARSET |
1120 | ||
1121 | (defvar cjk-char-width-table-list | |
1122 | '((ja_JP nil (japanese-jisx0208 (#x2121 . #x287E)) | |
1123 | (cp932-2-byte (#x8140 . #x879F))) | |
1124 | (zh_CN nil (chinese-gb2312 (#x2121 . #x297E))) | |
1125 | (zh_HK nil (big5-hkscs (#xA140 . #xA3FE) (#xC6A0 . #xC8FE))) | |
1126 | (zh_TW nil (big5 (#xA140 . #xA3FE)) | |
1127 | (chinese-cns11643-1 (#x2121 . #x427E))) | |
1128 | (ko_KR nil (korean-ksc5601 (#x2121 . #x2C7E))))) | |
1129 | ||
1130 | ;; Internal use only. | |
1131 | ;; Setup char-width-table appropriate for a language environment | |
1132 | ;; corresponding to LOCALE-NAME (symbol). | |
1133 | ||
1134 | (defun use-cjk-char-width-table (locale-name) | |
1135 | (while (char-table-parent char-width-table) | |
1136 | (setq char-width-table (char-table-parent char-width-table))) | |
9d3aa82c | 1137 | (let ((slot (assq locale-name cjk-char-width-table-list))) |
dbff07a2 KH |
1138 | (or slot (error "Unknown locale for CJK language environment: %s" |
1139 | locale-name)) | |
1140 | (unless (nth 1 slot) | |
1141 | (let ((table (make-char-table nil))) | |
1142 | (dolist (charset-info (nthcdr 2 slot)) | |
1143 | (let ((charset (car charset-info))) | |
1144 | (dolist (code-range (cdr charset-info)) | |
9d3aa82c | 1145 | (map-charset-chars #'(lambda (range _arg) |
dbff07a2 KH |
1146 | (set-char-table-range table range 2)) |
1147 | charset nil | |
1148 | (car code-range) (cdr code-range))))) | |
1149 | (optimize-char-table table) | |
1150 | (set-char-table-parent table char-width-table) | |
1151 | (setcar (cdr slot) table))) | |
1152 | (setq char-width-table (nth 1 slot)))) | |
55a3ed16 KH |
1153 | |
1154 | (defun use-default-char-width-table () | |
1155 | "Internal use only. | |
9f336de0 | 1156 | Setup char-width-table appropriate for non-CJK language environment." |
dbff07a2 KH |
1157 | (while (char-table-parent char-width-table) |
1158 | (setq char-width-table (char-table-parent char-width-table)))) | |
55a3ed16 | 1159 | |
87a39edb | 1160 | (optimize-char-table (standard-case-table)) |
87a39edb DL |
1161 | (optimize-char-table (standard-syntax-table)) |
1162 | ||
55a3ed16 KH |
1163 | \f |
1164 | ;; Setting char-script-table. | |
1165 | ||
57939ff4 EZ |
1166 | ;; The data is compiled from Blocks.txt and Scripts.txt in the |
1167 | ;; "Unicode Character Database", simplified to lump together all the | |
1168 | ;; blocks belonging to the same language. E.g., "Basic Latin", | |
1169 | ;; "Latin-1 Supplement", "Latin Extended-A", etc. are all lumped | |
1170 | ;; together under "latin". | |
1171 | ;; | |
b427c97e DL |
1172 | ;; The Unicode blocks actually extend past some of these ranges with |
1173 | ;; undefined codepoints. | |
9ce5de1c KH |
1174 | (let ((script-list nil)) |
1175 | (dolist | |
1176 | (elt | |
b982c760 | 1177 | '((#x0000 #x007F latin) |
6c52dd78 | 1178 | (#x00A0 #x024F latin) |
b9507529 EZ |
1179 | (#x0250 #x02AF phonetic) ; IPA Extensions |
1180 | (#x02B0 #x036F latin) ; Spacing Modifiers and Diacriticals | |
9ce5de1c KH |
1181 | (#x0370 #x03E1 greek) |
1182 | (#x03E2 #x03EF coptic) | |
1183 | (#x03F0 #x03F3 greek) | |
57939ff4 | 1184 | (#x0400 #x052F cyrillic) |
9ce5de1c KH |
1185 | (#x0530 #x058F armenian) |
1186 | (#x0590 #x05FF hebrew) | |
1187 | (#x0600 #x06FF arabic) | |
1188 | (#x0700 #x074F syriac) | |
b9507529 | 1189 | (#x0750 #x077F arabic) ; Arabic Supplement |
9ce5de1c | 1190 | (#x0780 #x07BF thaana) |
57939ff4 EZ |
1191 | (#x07C0 #x07FF nko) |
1192 | (#x0800 #x083F samaritan) | |
1193 | (#x0840 #x085F mandaic) | |
b9507529 | 1194 | (#x08A0 #x08FF arabic) ; Arabic Extended-A |
9ce5de1c KH |
1195 | (#x0900 #x097F devanagari) |
1196 | (#x0980 #x09FF bengali) | |
1197 | (#x0A00 #x0A7F gurmukhi) | |
1198 | (#x0A80 #x0AFF gujarati) | |
1199 | (#x0B00 #x0B7F oriya) | |
1200 | (#x0B80 #x0BFF tamil) | |
1201 | (#x0C00 #x0C7F telugu) | |
1202 | (#x0C80 #x0CFF kannada) | |
1203 | (#x0D00 #x0D7F malayalam) | |
1204 | (#x0D80 #x0DFF sinhala) | |
57939ff4 EZ |
1205 | (#x0E00 #x0E7F thai) |
1206 | (#x0E80 #x0EFF lao) | |
9ce5de1c | 1207 | (#x0F00 #x0FFF tibetan) |
b9507529 | 1208 | (#x1000 #x109F burmese) ; Myanmar |
9ce5de1c KH |
1209 | (#x10A0 #x10FF georgian) |
1210 | (#x1100 #x11FF hangul) | |
b9507529 | 1211 | (#x1200 #x139F ethiopic) ; Ethiopic and Ethiopic Supplement |
9ce5de1c KH |
1212 | (#x13A0 #x13FF cherokee) |
1213 | (#x1400 #x167F canadian-aboriginal) | |
1214 | (#x1680 #x169F ogham) | |
1215 | (#x16A0 #x16FF runic) | |
57939ff4 EZ |
1216 | (#x1700 #x171F tagalog) |
1217 | (#x1720 #x173F hanunoo) | |
1218 | (#x1740 #x175F buhid) | |
1219 | (#x1760 #x177F tagbanwa) | |
9ce5de1c KH |
1220 | (#x1780 #x17FF khmer) |
1221 | (#x1800 #x18AF mongolian) | |
b9507529 | 1222 | (#x18B0 #x18FF canadian-aboriginal) ; Canadian Aboriginal Syllabics Extended |
57939ff4 EZ |
1223 | (#x1900 #x194F limbu) |
1224 | (#x1950 #x197F tai-le) | |
b9507529 EZ |
1225 | (#x1980 #x19DF tai-lue) ; New Tai Lue |
1226 | (#x19E0 #x19FF khmer) ; Khmer Symbols | |
57939ff4 EZ |
1227 | (#x1A00 #x1A00 buginese) |
1228 | (#x1A20 #x1AAF tai-tham) | |
b9507529 | 1229 | (#x1AB0 #x1AFF latin) ; Combining Diacritical Marks Extended |
57939ff4 EZ |
1230 | (#x1B00 #x1B7F balinese) |
1231 | (#x1B80 #x1BBF sundanese) | |
1232 | (#x1BC0 #x1BFF batak) | |
1233 | (#x1C00 #x1C4F lepcha) | |
1234 | (#x1C50 #x1C7F ol-chiki) | |
1235 | (#x1CC0 #x1CCF sundanese) | |
1236 | (#x1CD0 #x1CFF vedic) | |
b9507529 EZ |
1237 | (#x1D00 #x1DBF phonetic) ; Phonetic Extensions & Supplement |
1238 | (#x1DC0 #x1EFF latin) ; Latin Extended Additional | |
1239 | (#x1F00 #x1FFF greek) ; Greek Extended | |
f041d33e | 1240 | (#x2000 #x27FF symbol) |
9ce5de1c | 1241 | (#x2800 #x28FF braille) |
57939ff4 EZ |
1242 | (#x2900 #x2BFF symbol) |
1243 | (#x2C00 #x2C5F glagolitic) | |
b9507529 | 1244 | (#x2C60 #x2C7F latin) ; Latin Extended-C |
57939ff4 | 1245 | (#x2C80 #x2CFF coptic) |
b9507529 | 1246 | (#x2D00 #x2D2F georgian) ; Georgian Supplement |
57939ff4 | 1247 | (#x2D30 #x2D7F tifinagh) |
b9507529 EZ |
1248 | (#x2D80 #x2DDF ethiopic) ; Ethiopic Extended |
1249 | (#x2DE0 #x2DFF cyrillic) ; Cyrillic Extended-A | |
57939ff4 | 1250 | (#x2E00 #x2E7F symbol) |
9ce5de1c KH |
1251 | (#x2E80 #x2FDF han) |
1252 | (#x2FF0 #x2FFF ideographic-description) | |
1253 | (#x3000 #x303F cjk-misc) | |
b9507529 | 1254 | (#x3040 #x30FF kana) ; Hiragana and Katakana |
9ce5de1c | 1255 | (#x3100 #x312F bopomofo) |
b9507529 | 1256 | (#x3130 #x318F hangul) ; Hangul Compatibility Jamo |
9ce5de1c | 1257 | (#x3190 #x319F kanbun) |
b9507529 EZ |
1258 | (#x31A0 #x31BF bopomofo) ; Bopomofo Extended |
1259 | (#x31C0 #x31EF cjk-misc) ; CJK Strokes | |
1260 | (#x31F0 #x31FF kana) ; Katakana Phonetic Extensions | |
57939ff4 | 1261 | (#x3200 #x9FAF han) |
9ce5de1c | 1262 | (#xA000 #xA4CF yi) |
57939ff4 EZ |
1263 | (#xA4D0 #xA4FF lisu) |
1264 | (#xA500 #xA63F vai) | |
b9507529 | 1265 | (#xA640 #xA69F cyrillic) ; Cyrillic Extended-B |
57939ff4 EZ |
1266 | (#xA6A0 #xA6FF bamum) |
1267 | (#xA700 #xA7FF latin) | |
1268 | (#xA800 #xA82F syloti-nagri) | |
1269 | (#xA830 #xA83F north-indic-number) | |
1270 | (#xA840 #xA87F phags-pa) | |
1271 | (#xA880 #xA8DF saurashtra) | |
b9507529 | 1272 | (#xA8E0 #xA8FF devanagari) ; Devanagari Extended |
57939ff4 EZ |
1273 | (#xA900 #xA92F kayah-li) |
1274 | (#xA930 #xA95F rejang) | |
b9507529 | 1275 | (#xA960 #xA97F hangul) ; Hangul Jamo Extended |
57939ff4 | 1276 | (#xA980 #xA9DF javanese) |
b9507529 | 1277 | (#xA9E0 #xA9FF burmese) ; Myanmar Extended-B |
1ffae953 | 1278 | (#xAA00 #xAA5F cham) |
b9507529 | 1279 | (#xAA60 #xAA7F burmese) ; Myanmar Extended-A |
d807d0c7 | 1280 | (#xAA80 #xAADF tai-viet) |
b9507529 EZ |
1281 | (#xAAE0 #xAAFF meetei-mayek) ; Meetei Mayek Extensions |
1282 | (#xAB00 #xAB2F ethiopic) ; Ethiopic Extended-A | |
1283 | (#xAB30 #xAB6F latin) ; Latin Extended-E | |
57939ff4 EZ |
1284 | (#xABC0 #xABFF meetei-mayek) |
1285 | (#xAC00 #xD7FF hangul) | |
95ac45fa | 1286 | (#xF900 #xFAFF han) |
b9507529 EZ |
1287 | (#xFB00 #xFB06 latin) ; Latin ligatures |
1288 | (#xFB13 #xFB17 armenian) ; Armenian ligatures | |
1289 | (#xFB1D #xFB4F hebrew) ; Alphabetic Presentation Forms | |
1290 | (#xFB50 #xFDFF arabic) ; Arabic Presentation Forms-A | |
1291 | (#xFE20 #xFE2F latin) ; Combining Half Marks | |
57939ff4 | 1292 | (#xFE30 #xFE4F han) |
b9507529 | 1293 | (#xFE70 #xFEFF arabic) ; Arabic Presentation Forms-B |
9ce5de1c KH |
1294 | (#xFF00 #xFF5F cjk-misc) |
1295 | (#xFF61 #xFF9F kana) | |
1296 | (#xFFE0 #xFFE6 cjk-misc) | |
458888ab KH |
1297 | (#x10000 #x100FF linear-b) |
1298 | (#x10100 #x1013F aegean-number) | |
57939ff4 EZ |
1299 | (#x10140 #x1018F ancient-greek-number) |
1300 | (#x10190 #x101CF ancient-symbol) | |
458888ab KH |
1301 | (#x101D0 #x101FF phaistos-disc) |
1302 | (#x10280 #x1029F lycian) | |
1303 | (#x102A0 #x102DF carian) | |
b9507529 | 1304 | (#x102E0 #x102FF coptic) ; Coptic Epact Numbers |
458888ab | 1305 | (#x10300 #x1032F olt-italic) |
57939ff4 | 1306 | (#x10330 #x1034F gothic) |
b9507529 | 1307 | (#x10350 #x1037F old-permic) |
458888ab KH |
1308 | (#x10380 #x1039F ugaritic) |
1309 | (#x103A0 #x103DF old-persian) | |
1310 | (#x10400 #x1044F deseret) | |
1311 | (#x10450 #x1047F shavian) | |
1312 | (#x10480 #x104AF osmanya) | |
b9507529 EZ |
1313 | (#x10500 #x1052F elbasan) |
1314 | (#x10530 #x1056F caucasian-albanian) | |
1315 | (#x10600 #x106BF linear-a) | |
458888ab | 1316 | (#x10800 #x1083F cypriot-syllabary) |
57939ff4 | 1317 | (#x10840 #x1085F aramaic) |
b9507529 EZ |
1318 | (#x10860 #x1087F palmyrene) |
1319 | (#x10880 #x108AF nabataean) | |
458888ab KH |
1320 | (#x10900 #x1091F phoenician) |
1321 | (#x10920 #x1093F lydian) | |
57939ff4 | 1322 | (#x10980 #x109FF meroitic) |
458888ab | 1323 | (#x10A00 #x10A5F kharoshthi) |
57939ff4 | 1324 | (#x10A60 #x10A7F old-south-arabian) |
b9507529 EZ |
1325 | (#x10A80 #x10A9F old-north-arabian) |
1326 | (#x10AC0 #x10AFF manichaean) | |
57939ff4 EZ |
1327 | (#x10B00 #x10B3F avestan) |
1328 | (#x10B40 #x10B5F inscriptional-parthian) | |
1329 | (#x10B60 #x10B7F inscriptional-pahlavi) | |
b9507529 | 1330 | (#x10B80 #x10BAF psalter-pahlavi) |
57939ff4 EZ |
1331 | (#x10C00 #x10C4F old-turkic) |
1332 | (#x10E60 #x10E7F rumi-number) | |
1333 | (#x11000 #x1107F brahmi) | |
1334 | (#x11080 #x110CF kaithi) | |
1335 | (#x110D0 #x110FF sora-sompeng) | |
1336 | (#x11100 #x1114F chakma) | |
b9507529 | 1337 | (#x11150 #x1117F mahajani) |
57939ff4 | 1338 | (#x11180 #x111DF sharada) |
b9507529 EZ |
1339 | (#x111E0 #x111FF sinhala-archaic-number) |
1340 | (#x11200 #x1124F khojki) | |
1341 | (#x112B0 #x112FF khudawadi) | |
1342 | (#x11300 #x1137F grantha) | |
1343 | (#x11480 #x114DF tirhuta) | |
1344 | (#x11580 #x115FF siddham) | |
1345 | (#x11600 #x1165F modi) | |
57939ff4 | 1346 | (#x11680 #x116CF takri) |
b9507529 EZ |
1347 | (#x118A0 #x118FF warang-citi) |
1348 | (#x11AC0 #x11AFF pau-cin-hau) | |
458888ab KH |
1349 | (#x12000 #x123FF cuneiform) |
1350 | (#x12400 #x1247F cuneiform-numbers-and-punctuation) | |
57939ff4 EZ |
1351 | (#x13000 #x1342F egyptian) |
1352 | (#x16800 #x16A3F bamum) | |
b9507529 EZ |
1353 | (#x16A40 #x16A6F mro) |
1354 | (#x16AD0 #x16AFF bassa-vah) | |
1355 | (#x16B00 #x16B8F pahawh-hmong) | |
57939ff4 | 1356 | (#x16F00 #x16F9F miao) |
b9507529 EZ |
1357 | (#x1B000 #x1B0FF kana) ; Kana Supplement |
1358 | (#x1BC00 #x1BCAF duployan-shorthand) | |
e7da2f38 KH |
1359 | (#x1D000 #x1D0FF byzantine-musical-symbol) |
1360 | (#x1D100 #x1D1FF musical-symbol) | |
458888ab KH |
1361 | (#x1D200 #x1D24F ancient-greek-musical-notation) |
1362 | (#x1D300 #x1D35F tai-xuan-jing-symbol) | |
1363 | (#x1D360 #x1D37F counting-rod-numeral) | |
e7da2f38 | 1364 | (#x1D400 #x1D7FF mathematical) |
b9507529 EZ |
1365 | (#x1E800 #x1E8DF mende-kikakui) |
1366 | (#x1EE00 #x1EEFF arabic) ; Arabic Mathematical Alphabetic Symbols | |
458888ab KH |
1367 | (#x1F000 #x1F02F mahjong-tile) |
1368 | (#x1F030 #x1F09F domino-tile) | |
57939ff4 | 1369 | (#x1F0A0 #x1F0FF playing-cards) |
b9507529 EZ |
1370 | (#x1F100 #x1F1FF symbol) ; Enclosed Alphanumeric Supplement |
1371 | (#x1F200 #x1F2FF han) ; Enclosed Ideographic Supplement | |
1372 | (#x1F300 #x1F8FF symbol) | |
57939ff4 | 1373 | (#x20000 #x2B81F han) |
9ce5de1c KH |
1374 | (#x2F800 #x2FFFF han))) |
1375 | (set-char-table-range char-script-table | |
1376 | (cons (car elt) (nth 1 elt)) (nth 2 elt)) | |
1377 | (or (memq (nth 2 elt) script-list) | |
1378 | (setq script-list (cons (nth 2 elt) script-list)))) | |
1379 | (set-char-table-extra-slot char-script-table 0 (nreverse script-list))) | |
1380 | ||
8f924df7 | 1381 | (map-charset-chars |
9d3aa82c | 1382 | #'(lambda (range _ignore) |
cdfc5141 KH |
1383 | (set-char-table-range char-script-table range 'tibetan)) |
1384 | 'tibetan) | |
1385 | ||
e7259832 | 1386 | \f |
59db3a5c KH |
1387 | ;;; Setting unicode-category-table. |
1388 | ||
20372d0c GM |
1389 | (when (setq unicode-category-table |
1390 | (unicode-property-table-internal 'general-category)) | |
1391 | (map-char-table #'(lambda (key val) | |
1392 | (if (and val | |
1393 | (or (and (/= (aref (symbol-name val) 0) ?M) | |
1394 | (/= (aref (symbol-name val) 0) ?C)) | |
1395 | (eq val 'Zs))) | |
1396 | (modify-category-entry key ?.))) | |
1397 | unicode-category-table)) | |
8ea6fa80 KH |
1398 | |
1399 | (optimize-char-table (standard-category-table)) | |
59db3a5c KH |
1400 | |
1401 | \f | |
b2cca856 KH |
1402 | ;; Display of glyphless characters. |
1403 | ||
1404 | (defvar char-acronym-table | |
1405 | (make-char-table 'char-acronym-table nil) | |
1406 | "Char table of acronyms for non-graphic characters.") | |
1407 | ||
1408 | (let ((c0-acronyms '("NUL" "SOH" "STX" "ETX" "EOT" "ENQ" "ACK" "BEL" | |
1409 | "BS" nil nil "VT" "FF" "CR" "SO" "SI" | |
1410 | "DLE" "DC1" "DC2" "DC3" "DC4" "NAK" "SYN" "ETB" | |
1411 | "CAN" "EM" "SUB" "ESC" "FC" "GS" "RS" "US"))) | |
1412 | (dotimes (i 32) | |
1413 | (aset char-acronym-table i (car c0-acronyms)) | |
1414 | (setq c0-acronyms (cdr c0-acronyms)))) | |
1415 | ||
1416 | (let ((c1-acronyms '("XXX" "XXX" "BPH" "NBH" "IND" "NEL" "SSA" "ESA" | |
1417 | "HTS" "HTJ" "VTS" "PLD" "PLU" "R1" "SS2" "SS1" | |
1418 | "DCS" "PU1" "PU2" "STS" "CCH" "MW" "SPA" "EPA" | |
1419 | "SOS" "XXX" "SC1" "CSI" "ST" "OSC" "PM" "APC"))) | |
1420 | (dotimes (i 32) | |
1421 | (aset char-acronym-table (+ #x0080 i) (car c1-acronyms)) | |
1422 | (setq c1-acronyms (cdr c1-acronyms)))) | |
1423 | ||
1424 | (aset char-acronym-table #x17B4 "KIVAQ") ; KHMER VOWEL INHERENT AQ | |
1425 | (aset char-acronym-table #x17B5 "KIVAA") ; KHMER VOWEL INHERENT AA | |
1426 | (aset char-acronym-table #x200B "ZWSP") ; ZERO WIDTH SPACE | |
1427 | (aset char-acronym-table #x200C "ZWNJ") ; ZERO WIDTH NON-JOINER | |
1428 | (aset char-acronym-table #x200D "ZWJ") ; ZERO WIDTH JOINER | |
1429 | (aset char-acronym-table #x200E "LRM") ; LEFT-TO-RIGHT MARK | |
1430 | (aset char-acronym-table #x200F "RLM") ; RIGHT-TO-LEFT MARK | |
1431 | (aset char-acronym-table #x202A "LRE") ; LEFT-TO-RIGHT EMBEDDING | |
1432 | (aset char-acronym-table #x202B "RLE") ; RIGHT-TO-LEFT EMBEDDING | |
1433 | (aset char-acronym-table #x202C "PDF") ; POP DIRECTIONAL FORMATTING | |
1434 | (aset char-acronym-table #x202D "LRO") ; LEFT-TO-RIGHT OVERRIDE | |
1435 | (aset char-acronym-table #x202E "RLO") ; RIGHT-TO-LEFT OVERRIDE | |
1436 | (aset char-acronym-table #x2060 "WJ") ; WORD JOINER | |
1437 | (aset char-acronym-table #x206A "ISS") ; INHIBIT SYMMETRIC SWAPPING | |
1438 | (aset char-acronym-table #x206B "ASS") ; ACTIVATE SYMMETRIC SWAPPING | |
1439 | (aset char-acronym-table #x206C "IAFS") ; INHIBIT ARABIC FORM SHAPING | |
1440 | (aset char-acronym-table #x206D "AAFS") ; ACTIVATE ARABIC FORM SHAPING | |
1441 | (aset char-acronym-table #x206E "NADS") ; NATIONAL DIGIT SHAPES | |
1442 | (aset char-acronym-table #x206F "NODS") ; NOMINAL DIGIT SHAPES | |
1443 | (aset char-acronym-table #xFEFF "ZWNBSP") ; ZERO WIDTH NO-BREAK SPACE | |
1444 | (aset char-acronym-table #xFFF9 "IAA") ; INTERLINEAR ANNOTATION ANCHOR | |
1445 | (aset char-acronym-table #xFFFA "IAS") ; INTERLINEAR ANNOTATION SEPARATOR | |
1446 | (aset char-acronym-table #xFFFB "IAT") ; INTERLINEAR ANNOTATION TERMINATOR | |
1447 | (aset char-acronym-table #x1D173 "BEGBM") ; MUSICAL SYMBOL BEGIN BEAM | |
1448 | (aset char-acronym-table #x1D174 "ENDBM") ; MUSICAL SYMBOL END BEAM | |
1449 | (aset char-acronym-table #x1D175 "BEGTIE") ; MUSICAL SYMBOL BEGIN TIE | |
1450 | (aset char-acronym-table #x1D176 "END") ; MUSICAL SYMBOL END TIE | |
1451 | (aset char-acronym-table #x1D177 "BEGSLR") ; MUSICAL SYMBOL BEGIN SLUR | |
1452 | (aset char-acronym-table #x1D178 "ENDSLR") ; MUSICAL SYMBOL END SLUR | |
1453 | (aset char-acronym-table #x1D179 "BEGPHR") ; MUSICAL SYMBOL BEGIN PHRASE | |
1454 | (aset char-acronym-table #x1D17A "ENDPHR") ; MUSICAL SYMBOL END PHRASE | |
1455 | (aset char-acronym-table #xE0001 "|->TAG") ; LANGUAGE TAG | |
1456 | (aset char-acronym-table #xE0020 "SP TAG") ; TAG SPACE | |
1457 | (dotimes (i 94) | |
1458 | (aset char-acronym-table (+ #xE0021 i) (format " %c TAG" (+ 33 i)))) | |
1459 | (aset char-acronym-table #xE007F "->|TAG") ; CANCEL TAG | |
1460 | ||
0e7c0582 | 1461 | (defun update-glyphless-char-display (&optional variable value) |
0eb025fb | 1462 | "Make the setting of `glyphless-char-display-control' take effect. |
b2cca856 | 1463 | This function updates the char-table `glyphless-char-display'." |
0e7c0582 EZ |
1464 | (when value |
1465 | (set-default variable value)) | |
1466 | (dolist (elt value) | |
b2cca856 KH |
1467 | (let ((target (car elt)) |
1468 | (method (cdr elt))) | |
0eb025fb EZ |
1469 | (or (memq method '(zero-width thin-space empty-box acronym hex-code)) |
1470 | (error "Invalid glyphless character display method: %s" method)) | |
b2cca856 | 1471 | (cond ((eq target 'c0-control) |
bd3921f0 PS |
1472 | (glyphless-set-char-table-range glyphless-char-display |
1473 | #x00 #x1F method) | |
96107967 EZ |
1474 | ;; Users will not expect their newlines and TABs be |
1475 | ;; displayed as anything but themselves, so exempt those | |
1476 | ;; two characters from c0-control. | |
1477 | (set-char-table-range glyphless-char-display #x9 nil) | |
1478 | (set-char-table-range glyphless-char-display #xa nil)) | |
b2cca856 | 1479 | ((eq target 'c1-control) |
bd3921f0 PS |
1480 | (glyphless-set-char-table-range glyphless-char-display |
1481 | #x80 #x9F method)) | |
b2cca856 | 1482 | ((eq target 'format-control) |
20372d0c GM |
1483 | (when unicode-category-table |
1484 | (map-char-table | |
1485 | #'(lambda (char category) | |
1486 | (if (eq category 'Cf) | |
1487 | (let ((this-method method) | |
1488 | from to) | |
1489 | (if (consp char) | |
1490 | (setq from (car char) to (cdr char)) | |
1491 | (setq from char to char)) | |
1492 | (while (<= from to) | |
1493 | (when (/= from #xAD) | |
1494 | (if (eq method 'acronym) | |
1495 | (setq this-method | |
1496 | (aref char-acronym-table from))) | |
1497 | (set-char-table-range glyphless-char-display | |
1498 | from this-method)) | |
1499 | (setq from (1+ from)))))) | |
1500 | unicode-category-table))) | |
b2cca856 KH |
1501 | ((eq target 'no-font) |
1502 | (set-char-table-extra-slot glyphless-char-display 0 method)) | |
1503 | (t | |
0eb025fb | 1504 | (error "Invalid glyphless character group: %s" target)))))) |
bd3921f0 PS |
1505 | |
1506 | (defun glyphless-set-char-table-range (chartable from to method) | |
1507 | (if (eq method 'acronym) | |
1508 | (let ((i from)) | |
1509 | (while (<= i to) | |
1510 | (set-char-table-range chartable i (aref char-acronym-table i)) | |
1511 | (setq i (1+ i)))) | |
1512 | (set-char-table-range chartable (cons from to) method))) | |
b2cca856 | 1513 | |
0e7c0582 EZ |
1514 | ;;; Control of displaying glyphless characters. |
1515 | (defcustom glyphless-char-display-control | |
1516 | '((format-control . thin-space) | |
1517 | (no-font . hex-code)) | |
1518 | "List of directives to control display of glyphless characters. | |
1519 | ||
1520 | Each element has the form (GROUP . METHOD), where GROUP is a | |
1521 | symbol specifying the character group, and METHOD is a symbol | |
1522 | specifying the method of displaying characters belonging to that | |
1523 | group. | |
1524 | ||
1525 | GROUP must be one of these symbols: | |
96107967 | 1526 | `c0-control': U+0000..U+001F, but excluding newline and TAB. |
0e7c0582 EZ |
1527 | `c1-control': U+0080..U+009F. |
1528 | `format-control': Characters of Unicode General Category `Cf', | |
1529 | such as U+200C (ZWNJ), U+200E (LRM), but | |
1530 | excluding characters that have graphic images, | |
1531 | such as U+00AD (SHY). | |
1532 | `no-font': characters for which no suitable font is found. | |
1533 | For character terminals, characters that cannot | |
1534 | be encoded by `terminal-coding-system'. | |
1535 | ||
1536 | METHOD must be one of these symbols: | |
1537 | `zero-width': don't display. | |
1538 | `thin-space': display a thin (1-pixel width) space. On character | |
1539 | terminals, display as 1-character space. | |
1540 | `empty-box': display an empty box. | |
1541 | `acronym': display an acronym of the character in a box. The | |
1542 | acronym is taken from `char-acronym-table', which see. | |
d35f31a4 XF |
1543 | `hex-code': display the hexadecimal character code in a box. |
1544 | ||
1545 | Do not set its value directly from Lisp; the value takes effect | |
1546 | only via a custom `:set' | |
1547 | function (`update-glyphless-char-display'), which updates | |
1548 | `glyphless-char-display'." | |
2bed3f04 | 1549 | :version "24.1" |
0e7c0582 EZ |
1550 | :type '(alist :key-type (symbol :tag "Character Group") |
1551 | :value-type (symbol :tag "Display Method")) | |
1552 | :options '((c0-control | |
1553 | (choice (const :tag "Don't display" zero-width) | |
1554 | (const :tag "Display as thin space" thin-space) | |
1555 | (const :tag "Display as empty box" empty-box) | |
1556 | (const :tag "Display acronym" acronym) | |
1557 | (const :tag "Display hex code in a box" hex-code))) | |
1558 | (c1-control | |
1559 | (choice (const :tag "Don't display" zero-width) | |
1560 | (const :tag "Display as thin space" thin-space) | |
1561 | (const :tag "Display as empty box" empty-box) | |
1562 | (const :tag "Display acronym" acronym) | |
1563 | (const :tag "Display hex code in a box" hex-code))) | |
1564 | (format-control | |
1565 | (choice (const :tag "Don't display" zero-width) | |
1566 | (const :tag "Display as thin space" thin-space) | |
1567 | (const :tag "Display as empty box" empty-box) | |
1568 | (const :tag "Display acronym" acronym) | |
1569 | (const :tag "Display hex code in a box" hex-code))) | |
1570 | (no-font | |
1571 | (choice (const :tag "Don't display" zero-width) | |
1572 | (const :tag "Display as thin space" thin-space) | |
1573 | (const :tag "Display as empty box" empty-box) | |
1574 | (const :tag "Display acronym" acronym) | |
1575 | (const :tag "Display hex code in a box" hex-code)))) | |
1576 | :set 'update-glyphless-char-display | |
1577 | :group 'display) | |
1578 | ||
b2cca856 | 1579 | \f |
e7259832 KH |
1580 | ;;; Setting word boundary. |
1581 | ||
e7259832 | 1582 | (setq word-combining-categories |
4626499f KH |
1583 | '((nil . ?^) |
1584 | (?^ . nil) | |
7ffefb08 MB |
1585 | (?C . ?H) |
1586 | (?C . ?K))) | |
e7259832 KH |
1587 | |
1588 | (setq word-separating-categories ; (2-byte character sets) | |
4626499f | 1589 | '((?H . ?K) ; Hiragana - Katakana |
e7259832 KH |
1590 | )) |
1591 | ||
1cbfaab9 | 1592 | ;; Local Variables: |
985773c9 | 1593 | ;; coding: utf-8 |
1cbfaab9 | 1594 | ;; End: |
777cfce6 | 1595 | |
60370d40 | 1596 | ;;; characters.el ends here |