Commit | Line | Data |
---|---|---|
4ed46869 KH |
1 | ;;; characters.el --- set syntax and category for multibyte characters |
2 | ||
95df8112 | 3 | ;; Copyright (C) 1997, 2000-2011 Free Software Foundation, Inc. |
7976eda0 | 4 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
5df4f04c | 5 | ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
2fd125a3 KH |
6 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
7 | ;; Registration Number H14PRO021 | |
8f924df7 | 8 | ;; Copyright (C) 2003 |
55bd52ea KH |
9 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
10 | ;; Registration Number H13PRO009 | |
4ed46869 KH |
11 | |
12 | ;; Keywords: multibyte character, character set, syntax, category | |
13 | ||
14 | ;; This file is part of GNU Emacs. | |
15 | ||
4936186e | 16 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
4ed46869 | 17 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
18 | ;; the Free Software Foundation, either version 3 of the License, or |
19 | ;; (at your option) any later version. | |
4ed46869 KH |
20 | |
21 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
22 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
23 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
24 | ;; GNU General Public License for more details. | |
25 | ||
26 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 27 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
4ed46869 KH |
28 | |
29 | ;;; Commentary: | |
30 | ||
60370d40 PJ |
31 | ;;; Code: |
32 | ||
4ed46869 KH |
33 | ;;; Predefined categories. |
34 | ||
35 | ;; For each character set. | |
36 | ||
46bf60bc KH |
37 | (define-category ?a "ASCII |
38 | ASCII graphic characters 32-126 (ISO646 IRV:1983[4/0])") | |
4ed46869 KH |
39 | (define-category ?l "Latin") |
40 | (define-category ?t "Thai") | |
41 | (define-category ?g "Greek") | |
42 | (define-category ?b "Arabic") | |
43 | (define-category ?w "Hebrew") | |
44 | (define-category ?y "Cyrillic") | |
46bf60bc KH |
45 | (define-category ?k "Katakana |
46 | Japanese katakana") | |
47 | (define-category ?r "Roman | |
48 | Japanese roman") | |
4ed46869 KH |
49 | (define-category ?c "Chinese") |
50 | (define-category ?j "Japanese") | |
51 | (define-category ?h "Korean") | |
46bf60bc KH |
52 | (define-category ?e "Ethiopic |
53 | Ethiopic (Ge'ez)") | |
54 | (define-category ?v "Viet | |
55 | Vietnamese") | |
4ed46869 | 56 | (define-category ?i "Indian") |
6eba8645 | 57 | (define-category ?o "Lao") |
9395eb7c | 58 | (define-category ?q "Tibetan") |
4ed46869 KH |
59 | |
60 | ;; For each group (row) of 2-byte character sets. | |
61 | ||
46bf60bc KH |
62 | (define-category ?A "2-byte alnum |
63 | Alpha-numeric characters of 2-byte character sets") | |
64 | (define-category ?C "2-byte han | |
65 | Chinese (Han) characters of 2-byte character sets") | |
66 | (define-category ?G "2-byte Greek | |
67 | Greek characters of 2-byte character sets") | |
68 | (define-category ?H "2-byte Hiragana | |
69 | Japanese Hiragana characters of 2-byte character sets") | |
70 | (define-category ?K "2-byte Katakana | |
71 | Japanese Katakana characters of 2-byte character sets") | |
72 | (define-category ?N "2-byte Korean | |
73 | Korean Hangul characters of 2-byte character sets") | |
91c491e0 | 74 | (define-category ?Y "2-byte Cyrillic |
46bf60bc | 75 | Cyrillic characters of 2-byte character sets") |
4ed46869 KH |
76 | (define-category ?I "Indian Glyphs") |
77 | ||
78 | ;; For phonetic classifications. | |
79 | ||
80 | (define-category ?0 "consonant") | |
46bf60bc | 81 | (define-category ?1 "base vowel |
4eb97232 | 82 | Base (independent) vowel") |
46bf60bc | 83 | (define-category ?2 "upper diacritic |
4eb97232 | 84 | Upper diacritical mark (including upper vowel)") |
46bf60bc | 85 | (define-category ?3 "lower diacritic |
4eb97232 | 86 | Lower diacritical mark (including lower vowel)") |
46bf60bc | 87 | (define-category ?4 "combining tone |
4eb97232 | 88 | Combining tone mark") |
9765a2ba | 89 | (define-category ?5 "symbol") |
4ed46869 | 90 | (define-category ?6 "digit") |
91c491e0 | 91 | (define-category ?7 "vowel diacritic |
4eb97232 | 92 | Vowel-modifying diacritical mark") |
6eba8645 KH |
93 | (define-category ?8 "vowel-signs") |
94 | (define-category ?9 "semivowel lower") | |
4ed46869 KH |
95 | |
96 | ;; For filling. | |
46bf60bc KH |
97 | (define-category ?| "line breakable |
98 | While filling, we can break a line at this character.") | |
4ed46869 | 99 | |
504af7b2 | 100 | ;; For indentation calculation. |
70ea295a | 101 | (define-category ?\s |
46bf60bc KH |
102 | "space for indent |
103 | This character counts as a space for indentation purposes.") | |
504af7b2 | 104 | |
94487c4e | 105 | ;; Keep the following for `kinsoku' processing. See comments in |
4ed46869 | 106 | ;; kinsoku.el. |
46bf60bc KH |
107 | (define-category ?> "Not at bol |
108 | A character which can't be placed at beginning of line.") | |
109 | (define-category ?< "Not at eol | |
110 | A character which can't be placed at end of line.") | |
4ed46869 | 111 | |
8ea6fa80 KH |
112 | ;; Base and Combining |
113 | (define-category ?. "Base | |
114 | Base characters (Unicode General Category L,N,P,S,Zs)") | |
46bf60bc | 115 | (define-category ?^ "Combining |
4eb97232 | 116 | Combining diacritic or mark (Unicode General Category M)") |
4ed46869 KH |
117 | \f |
118 | ;;; Setting syntax and category. | |
119 | ||
120 | ;; ASCII | |
121 | ||
e2cc40b7 KH |
122 | ;; All ASCII characters have the category `a' (ASCII) and `l' (Latin). |
123 | (modify-category-entry '(32 . 127) ?a) | |
124 | (modify-category-entry '(32 . 127) ?l) | |
4ed46869 | 125 | |
c94ae9eb DL |
126 | ;; Deal with the CJK charsets first. Since the syntax of blocks is |
127 | ;; defined per charset, and the charsets may contain e.g. Latin | |
128 | ;; characters, we end up with the wrong syntax definitions if we're | |
129 | ;; not careful. | |
4ed46869 | 130 | |
66bff5ed | 131 | ;; Chinese characters (Unicode) |
a5bb49e1 KH |
132 | (modify-category-entry '(#x2E80 . #x312F) ?|) |
133 | (modify-category-entry '(#x3190 . #x33FF) ?|) | |
66a85e76 KH |
134 | (modify-category-entry '(#x3400 . #x4DBF) ?C) |
135 | (modify-category-entry '(#x4E00 . #x9FAF) ?C) | |
66bff5ed KH |
136 | (modify-category-entry '(#x3400 . #x9FAF) ?c) |
137 | (modify-category-entry '(#x3400 . #x9FAF) ?|) | |
138 | (modify-category-entry '(#xF900 . #xFAFF) ?C) | |
139 | (modify-category-entry '(#xF900 . #xFAFF) ?c) | |
140 | (modify-category-entry '(#xF900 . #xFAFF) ?|) | |
796f8b2f KH |
141 | (modify-category-entry '(#x20000 . #x2FFFF) ?|) |
142 | (modify-category-entry '(#x20000 . #x2FFFF) ?C) | |
143 | (modify-category-entry '(#x20000 . #x2FFFF) ?c) | |
8e4cd685 | 144 | |
4ed46869 KH |
145 | |
146 | ;; Chinese character set (GB2312) | |
147 | ||
66bff5ed KH |
148 | (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2121 #x217E) |
149 | (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2221 #x227E) | |
150 | (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2921 #x297E) | |
4ed46869 | 151 | |
87a39edb | 152 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c) |
9ad4b491 KH |
153 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2330 #x2339) |
154 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2341 #x235A) | |
155 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2361 #x237A) | |
66bff5ed KH |
156 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?H #x2421 #x247E) |
157 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?K #x2521 #x257E) | |
158 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?G #x2621 #x267E) | |
159 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E) | |
160 | (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E) | |
4ed46869 KH |
161 | |
162 | ;; Chinese character set (BIG5) | |
163 | ||
e7259832 | 164 | (map-charset-chars #'modify-category-entry 'big5 ?c) |
66a85e76 | 165 | (map-charset-chars #'modify-category-entry 'big5 ?C #xA259 #xA261) |
9ad4b491 | 166 | (map-charset-chars #'modify-category-entry 'big5 ?C #xA440 #xC67E) |
66a85e76 | 167 | (map-charset-chars #'modify-category-entry 'big5 ?C #xC940 #xF9DC) |
4ed46869 KH |
168 | |
169 | ;; Chinese character set (CNS11643) | |
170 | ||
87a39edb DL |
171 | (dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3 |
172 | chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6 | |
173 | chinese-cns11643-7)) | |
174 | (map-charset-chars #'modify-category-entry c ?c) | |
9ad4b491 KH |
175 | (if (eq c 'chinese-cns11643-1) |
176 | (map-charset-chars #'modify-category-entry c ?C #x4421 #x7E7E) | |
8e4cd685 | 177 | (map-charset-chars #'modify-category-entry c ?C))) |
4ed46869 | 178 | |
8f924df7 | 179 | ;; Japanese character set (JISX0201, JISX0208, JISX0212, JISX0213) |
4ed46869 | 180 | |
66bff5ed | 181 | (map-charset-chars #'modify-category-entry 'katakana-jisx0201 ?k) |
4ed46869 | 182 | |
66bff5ed | 183 | (map-charset-chars #'modify-category-entry 'latin-jisx0201 ?r) |
4ed46869 | 184 | |
8f924df7 | 185 | (dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212 |
761f6427 KH |
186 | japanese-jisx0213-1 japanese-jisx0213-2 |
187 | cp932-2-byte)) | |
8e4cd685 | 188 | (map-charset-chars #'modify-category-entry l ?j)) |
4ed46869 | 189 | |
c4186f9c KH |
190 | ;; Fullwidth characters |
191 | (modify-category-entry '(#xff01 . #xff60) ?\|) | |
192 | ||
269a5dd0 | 193 | ;; Unicode equivalents of JISX0201-kana |
66bff5ed KH |
194 | (let ((range '(#xff61 . #xff9f))) |
195 | (modify-category-entry range ?k) | |
196 | (modify-category-entry range ?j) | |
197 | (modify-category-entry range ?\|)) | |
269a5dd0 DL |
198 | |
199 | ;; Katakana block | |
796f8b2f KH |
200 | (modify-category-entry '(#x3099 . #x309C) ?K) |
201 | (modify-category-entry '(#x30A0 . #x30FF) ?K) | |
6f3ac1e1 | 202 | (modify-category-entry '(#x31F0 . #x31FF) ?K) |
b11c2874 | 203 | (modify-category-entry '(#x30A0 . #x30FA) ?\|) |
796f8b2f | 204 | (modify-category-entry #x30FF ?\|) |
269a5dd0 DL |
205 | |
206 | ;; Hiragana block | |
796f8b2f KH |
207 | (modify-category-entry '(#x3040 . #x309F) ?H) |
208 | (modify-category-entry '(#x3040 . #x3096) ?\|) | |
209 | (modify-category-entry #x309F ?\|) | |
210 | (modify-category-entry #x30A0 ?H) | |
211 | (modify-category-entry #x30FC ?H) | |
212 | ||
269a5dd0 | 213 | |
4ed46869 | 214 | ;; JISX0208 |
66bff5ed KH |
215 | (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2121 #x227E) |
216 | (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2821 #x287E) | |
217 | (let ((chars '(?ー ?゛ ?゜ ?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇))) | |
69c2c6ea | 218 | (dolist (elt chars) |
abdaa411 | 219 | (modify-syntax-entry (car chars) "w"))) |
66bff5ed KH |
220 | |
221 | (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?A #x2321 #x237E) | |
222 | (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?H #x2421 #x247E) | |
223 | (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?K #x2521 #x257E) | |
224 | (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?G #x2621 #x267E) | |
225 | (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?Y #x2721 #x277E) | |
226 | (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?C #x3021 #x7E7E) | |
227 | (modify-category-entry ?ー ?K) | |
228 | (let ((chars '(?゛ ?゜))) | |
4ed46869 KH |
229 | (while chars |
230 | (modify-category-entry (car chars) ?K) | |
231 | (modify-category-entry (car chars) ?H) | |
232 | (setq chars (cdr chars)))) | |
66a85e76 | 233 | (let ((chars '(?仝 ?々 ?〆 ?〇))) |
4ed46869 KH |
234 | (while chars |
235 | (modify-category-entry (car chars) ?C) | |
236 | (setq chars (cdr chars)))) | |
237 | ||
238 | ;; JISX0212 | |
4ed46869 | 239 | |
66bff5ed | 240 | (map-charset-chars #'modify-syntax-entry 'japanese-jisx0212 "_" #x2121 #x237E) |
4ed46869 KH |
241 | |
242 | ;; JISX0201-Kana | |
87a39edb | 243 | |
abdaa411 | 244 | (let ((chars '(?。 ?、 ?・))) |
4ed46869 KH |
245 | (while chars |
246 | (modify-syntax-entry (car chars) ".") | |
247 | (setq chars (cdr chars)))) | |
248 | ||
e6d10035 KH |
249 | (modify-syntax-entry ?\「 "(」") |
250 | (modify-syntax-entry ?\」 "(「") | |
226e4119 | 251 | |
4ed46869 KH |
252 | ;; Korean character set (KSC5601) |
253 | ||
87a39edb | 254 | (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?h) |
66bff5ed KH |
255 | |
256 | (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2121 #x227E) | |
439f7264 DL |
257 | (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2621 #x277E) |
258 | (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2830 #x287E) | |
259 | (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2930 #x297E) | |
9ad4b491 KH |
260 | (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2330 #x2339) |
261 | (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2341 #x235A) | |
262 | (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2361 #x237A) | |
66bff5ed KH |
263 | (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?G #x2521 #x257E) |
264 | (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?H #x2A21 #x2A7E) | |
265 | (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?K #x2B21 #x2B7E) | |
266 | (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?Y #x2C21 #x2C7E) | |
4ed46869 | 267 | |
c94ae9eb | 268 | ;; These are in more than one charset. |
8f924df7 KH |
269 | (let ((parens (concat "〈〉《》「」『』【】〔〕〖〗〘〙〚〛" |
270 | "︵︶︷︸︹︺︻︼︽︾︿﹀﹁﹂﹃﹄" | |
271 | "()[]{}")) | |
272 | open close) | |
273 | (dotimes (i (/ (length parens) 2)) | |
274 | (setq open (aref parens (* i 2)) | |
275 | close (aref parens (1+ (* i 2)))) | |
276 | (modify-syntax-entry open (format "(%c" close)) | |
277 | (modify-syntax-entry close (format ")%c" open)))) | |
d05cfa1f | 278 | |
c94ae9eb | 279 | ;; Arabic character set |
6eba8645 | 280 | |
c94ae9eb DL |
281 | (let ((charsets '(arabic-iso8859-6 |
282 | arabic-digit | |
283 | arabic-1-column | |
284 | arabic-2-column))) | |
285 | (while charsets | |
286 | (map-charset-chars #'modify-category-entry (car charsets) ?b) | |
287 | (setq charsets (cdr charsets)))) | |
288 | (modify-category-entry '(#x600 . #x6ff) ?b) | |
289 | (modify-category-entry '(#xfb50 . #xfdff) ?b) | |
290 | (modify-category-entry '(#xfe70 . #xfefe) ?b) | |
6eba8645 | 291 | |
c94ae9eb DL |
292 | ;; Cyrillic character set (ISO-8859-5) |
293 | ||
294 | (modify-syntax-entry ?№ ".") | |
295 | ||
296 | ;; Ethiopic character set | |
297 | ||
4c81b0f6 KH |
298 | (modify-category-entry '(#x1200 . #x1399) ?e) |
299 | (modify-category-entry '(#x2d80 . #x2dde) ?e) | |
55a3ed16 | 300 | (let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨))) |
c94ae9eb DL |
301 | (while chars |
302 | (modify-syntax-entry (car chars) ".") | |
303 | (setq chars (cdr chars)))) | |
304 | (map-charset-chars #'modify-category-entry 'ethiopic ?e) | |
305 | ||
306 | ;; Hebrew character set (ISO-8859-8) | |
307 | ||
308 | (modify-syntax-entry #x5be ".") ; MAQAF | |
309 | (modify-syntax-entry #x5c0 ".") ; PASEQ | |
310 | (modify-syntax-entry #x5c3 ".") ; SOF PASUQ | |
311 | (modify-syntax-entry #x5f3 ".") ; GERESH | |
312 | (modify-syntax-entry #x5f4 ".") ; GERSHAYIM | |
313 | ||
314 | ;; Indian character set (IS 13194 and other Emacs original Indian charsets) | |
315 | ||
316 | (modify-category-entry '(#x901 . #x970) ?i) | |
317 | (map-charset-chars #'modify-category-entry 'indian-is13194 ?i) | |
318 | (map-charset-chars #'modify-category-entry 'indian-2-column ?i) | |
d05cfa1f | 319 | |
6eba8645 KH |
320 | ;; Lao character set |
321 | ||
abdaa411 DL |
322 | (modify-category-entry '(#xe80 . #xeff) ?o) |
323 | (map-charset-chars #'modify-category-entry 'lao ?o) | |
6eba8645 | 324 | |
abdaa411 | 325 | (let ((deflist '(("ກ-ຮ" "w" ?0) ; consonant |
e6d10035 KH |
326 | ("ະາຳຽເ-ໄ" "w" ?1) ; vowel base |
327 | ("ັິ-ືົໍ" "w" ?2) ; vowel upper | |
328 | ("ຸູ" "w" ?3) ; vowel lower | |
8f924df7 | 329 | ("່-໋" "w" ?4) ; tone mark |
e6d10035 KH |
330 | ("ຼຽ" "w" ?9) ; semivowel lower |
331 | ("໐-໙" "w" ?6) ; digit | |
332 | ("ຯໆ" "_" ?5) ; symbol | |
6eba8645 KH |
333 | )) |
334 | elm chars len syntax category to ch i) | |
335 | (while deflist | |
336 | (setq elm (car deflist)) | |
337 | (setq chars (car elm) | |
338 | len (length chars) | |
339 | syntax (nth 1 elm) | |
340 | category (nth 2 elm) | |
341 | i 0) | |
342 | (while (< i len) | |
343 | (if (= (aref chars i) ?-) | |
344 | (setq i (1+ i) | |
4a027a0d KH |
345 | to (aref chars i)) |
346 | (setq ch (aref chars i) | |
6eba8645 KH |
347 | to ch)) |
348 | (while (<= ch to) | |
269a5dd0 DL |
349 | (unless (string-equal syntax "w") |
350 | (modify-syntax-entry ch syntax)) | |
6eba8645 KH |
351 | (modify-category-entry ch category) |
352 | (setq ch (1+ ch))) | |
4a027a0d | 353 | (setq i (1+ i))) |
6eba8645 KH |
354 | (setq deflist (cdr deflist)))) |
355 | ||
4ed46869 KH |
356 | ;; Thai character set (TIS620) |
357 | ||
abdaa411 DL |
358 | (modify-category-entry '(#xe00 . #xe7f) ?t) |
359 | (map-charset-chars #'modify-category-entry 'thai-tis620 ?t) | |
4ed46869 KH |
360 | |
361 | (let ((deflist '(;; chars syntax category | |
e6d10035 KH |
362 | ("ก-รลว-ฮ" "w" ?0) ; consonant |
363 | ("ฤฦะาำเ-ๅ" "w" ?1) ; vowel base | |
364 | ("ัิ-ื็๎" "w" ?2) ; vowel upper | |
365 | ("ุ-ฺ" "w" ?3) ; vowel lower | |
8f924df7 | 366 | ("่-ํ" "w" ?4) ; tone mark |
e6d10035 KH |
367 | ("๐-๙" "w" ?6) ; digit |
368 | ("ฯๆ฿๏๚๛" "_" ?5) ; symbol | |
4ed46869 KH |
369 | )) |
370 | elm chars len syntax category to ch i) | |
9395eb7c KH |
371 | (while deflist |
372 | (setq elm (car deflist)) | |
373 | (setq chars (car elm) | |
374 | len (length chars) | |
375 | syntax (nth 1 elm) | |
376 | category (nth 2 elm) | |
377 | i 0) | |
378 | (while (< i len) | |
379 | (if (= (aref chars i) ?-) | |
380 | (setq i (1+ i) | |
4a027a0d KH |
381 | to (aref chars i)) |
382 | (setq ch (aref chars i) | |
9395eb7c KH |
383 | to ch)) |
384 | (while (<= ch to) | |
269a5dd0 DL |
385 | (unless (string-equal syntax "w") |
386 | (modify-syntax-entry ch syntax)) | |
9395eb7c KH |
387 | (modify-category-entry ch category) |
388 | (setq ch (1+ ch))) | |
4a027a0d | 389 | (setq i (1+ i))) |
9395eb7c KH |
390 | (setq deflist (cdr deflist)))) |
391 | ||
392 | ;; Tibetan character set | |
393 | ||
abdaa411 DL |
394 | (modify-category-entry '(#xf00 . #xfff) ?q) |
395 | (map-charset-chars #'modify-category-entry 'tibetan ?q) | |
396 | (map-charset-chars #'modify-category-entry 'tibetan-1-column ?q) | |
9395eb7c KH |
397 | |
398 | (let ((deflist '(;; chars syntax category | |
725d7c92 | 399 | ("ཀ-ཀྵཪ" "w" ?0) ; consonant |
55a3ed16 | 400 | ("ྐ-ྐྵྺྻྼ" "w" ?0) ; |
725d7c92 DL |
401 | ("ིེཻོཽྀ" "w" ?2) ; upper vowel |
402 | ("ཾྂྃ྆྇ྈྉྊྋ" "w" ?2) ; upper modifier | |
55a3ed16 | 403 | ("྄ཱུ༙༵༷" "w" ?3) ; lowel vowel/modifier |
8f924df7 | 404 | ("" "w" ?3) ; invisible vowel a |
725d7c92 DL |
405 | ("༠-༩༪-༳" "w" ?6) ; digit |
406 | ("་།-༒༔ཿ" "." ?|) ; line-break char | |
407 | ("་།༏༐༑༔ཿ" "." ?|) ; | |
408 | ("༈་།-༒༔ཿ༽༴" "." ?>) ; prohibition | |
409 | ("་།༏༐༑༔ཿ" "." ?>) ; | |
410 | ("ༀ-༊༼࿁࿂྅" "." ?<) ; prohibition | |
411 | ("༓༕-༘༚-༟༶༸-༻༾༿྾྿-࿏" "." ?q) ; others | |
9395eb7c KH |
412 | )) |
413 | elm chars len syntax category to ch i) | |
4ed46869 KH |
414 | (while deflist |
415 | (setq elm (car deflist)) | |
416 | (setq chars (car elm) | |
417 | len (length chars) | |
418 | syntax (nth 1 elm) | |
419 | category (nth 2 elm) | |
420 | i 0) | |
421 | (while (< i len) | |
422 | (if (= (aref chars i) ?-) | |
423 | (setq i (1+ i) | |
4a027a0d KH |
424 | to (aref chars i)) |
425 | (setq ch (aref chars i) | |
4ed46869 KH |
426 | to ch)) |
427 | (while (<= ch to) | |
269a5dd0 DL |
428 | (unless (string-equal syntax "w") |
429 | (modify-syntax-entry ch syntax)) | |
4ed46869 KH |
430 | (modify-category-entry ch category) |
431 | (setq ch (1+ ch))) | |
4a027a0d | 432 | (setq i (1+ i))) |
4ed46869 KH |
433 | (setq deflist (cdr deflist)))) |
434 | ||
435 | ;; Vietnamese character set | |
436 | ||
abdaa411 DL |
437 | ;; To make a word with Latin characters |
438 | (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?l) | |
439 | (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?v) | |
440 | ||
441 | (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?l) | |
442 | (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?v) | |
4ed46869 | 443 | |
e5dd1155 KH |
444 | (let ((tbl (standard-case-table)) |
445 | (i 32)) | |
446 | (while (< i 128) | |
725d7c92 DL |
447 | (let* ((char (decode-char 'vietnamese-viscii-upper i)) |
448 | (charl (decode-char 'vietnamese-viscii-lower i)) | |
449 | (uc (encode-char char 'ucs)) | |
450 | (lc (encode-char charl 'ucs))) | |
451 | (set-case-syntax-pair char (decode-char 'vietnamese-viscii-lower i) | |
4eb97232 | 452 | tbl) |
725d7c92 DL |
453 | (if uc (modify-category-entry uc ?v)) |
454 | (if lc (modify-category-entry lc ?v))) | |
e5dd1155 KH |
455 | (setq i (1+ i)))) |
456 | ||
d807d0c7 KH |
457 | ;; Tai Viet |
458 | (let ((deflist '(;; chars syntax category | |
459 | ((?ꪀ. ?ꪯ) "w" ?0) ; cosonant | |
460 | ("ꪱꪵꪶ" "w" ?1) ; vowel base | |
461 | ((?ꪹ . ?ꪽ) "w" ?1) ; vowel base | |
462 | ("ꪰꪲꪳꪷꪸꪾ" "w" ?2) ; vowel upper | |
463 | ("ꪴ" "w" ?3) ; vowel lower | |
464 | ("ꫀꫂ" "w" ?1) ; non-combining tone-mark | |
465 | ("꪿꫁" "w" ?4) ; combining tone-mark | |
466 | ((?ꫛ . ?꫟) "_" ?5) ; symbol | |
467 | ))) | |
468 | (dolist (elm deflist) | |
469 | (let ((chars (car elm)) | |
470 | (syntax (nth 1 elm)) | |
471 | (category (nth 2 elm))) | |
472 | (if (consp chars) | |
473 | (progn | |
474 | (modify-syntax-entry chars syntax) | |
475 | (modify-category-entry chars category)) | |
476 | (mapc #'(lambda (x) | |
477 | (modify-syntax-entry x syntax) | |
478 | (modify-category-entry x category)) | |
479 | chars))))) | |
c94ae9eb DL |
480 | |
481 | ;; Latin | |
482 | ||
483 | (modify-category-entry '(#x80 . #x024F) ?l) | |
d05cfa1f | 484 | |
85ef8ece KH |
485 | (let ((tbl (standard-case-table)) c) |
486 | ||
4fb82d62 DL |
487 | ;; Latin-1 |
488 | ||
489 | ;; Fixme: Some of the non-word syntaxes here perhaps should be | |
490 | ;; reviewed. (Note that the following all implicitly have word | |
491 | ;; syntax: ¢£¤¥¨ª¯²³´¶¸¹º.) There should be a well-defined way of | |
492 | ;; relating Unicode categories to Emacs syntax codes. | |
db92e81e KH |
493 | |
494 | ;; NBSP isn't semantically interchangeable with other whitespace chars, | |
495 | ;; so it's more like punctation. | |
496 | (set-case-syntax ? "." tbl) | |
4fb82d62 DL |
497 | (set-case-syntax ?¡ "." tbl) |
498 | (set-case-syntax ?¦ "_" tbl) | |
499 | (set-case-syntax ?§ "." tbl) | |
500 | (set-case-syntax ?© "_" tbl) | |
501 | (set-case-syntax-delims 171 187 tbl) ; « » | |
502 | (set-case-syntax ?¬ "_" tbl) | |
503 | (set-case-syntax ? "_" tbl) | |
504 | (set-case-syntax ?® "_" tbl) | |
505 | (set-case-syntax ?° "_" tbl) | |
506 | (set-case-syntax ?± "_" tbl) | |
507 | (set-case-syntax ?µ "_" tbl) | |
508 | (set-case-syntax ?· "_" tbl) | |
509 | (set-case-syntax ?¼ "_" tbl) | |
510 | (set-case-syntax ?½ "_" tbl) | |
511 | (set-case-syntax ?¾ "_" tbl) | |
512 | (set-case-syntax ?¿ "." tbl) | |
513 | (let ((c 192)) | |
514 | (while (<= c 222) | |
515 | (set-case-syntax-pair c (+ c 32) tbl) | |
516 | (setq c (1+ c)))) | |
517 | (set-case-syntax ?× "_" tbl) | |
518 | (set-case-syntax ?ß "w" tbl) | |
519 | (set-case-syntax ?÷ "_" tbl) | |
520 | ;; See below for ÿ. | |
85ef8ece | 521 | |
85ef8ece KH |
522 | ;; Latin Extended-A, Latin Extended-B |
523 | (setq c #x0100) | |
e5e381c8 KH |
524 | (while (<= c #x02B8) |
525 | (modify-category-entry c ?l) | |
d05cfa1f | 526 | (setq c (1+ c))) |
2bb915b8 | 527 | |
e5e381c8 KH |
528 | (let ((pair-ranges '((#x0100 . #x012F) |
529 | (#x0132 . #x0137) | |
530 | (#x0139 . #x0148) | |
531 | (#x014a . #x0177) | |
532 | (#x0179 . #x017E) | |
533 | (#x0182 . #x0185) | |
796f8b2f KH |
534 | (#x0187 . #x0188) |
535 | (#x018B . #x018C) | |
e5e381c8 KH |
536 | (#x0191 . #x0192) |
537 | (#x0198 . #x0199) | |
538 | (#x01A0 . #x01A5) | |
539 | (#x01A7 . #x01A8) | |
540 | (#x01AC . #x01AD) | |
541 | (#x01AF . #x01B0) | |
542 | (#x01B3 . #x01B6) | |
543 | (#x01BC . #x01BD) | |
544 | (#x01CD . #x01DC) | |
545 | (#x01DE . #x01EF) | |
546 | (#x01F4 . #x01F5) | |
547 | (#x01F8 . #x021F) | |
548 | (#x0222 . #x0233) | |
549 | (#x023B . #x023C) | |
550 | (#x0241 . #x0242) | |
551 | (#x0246 . #x024F)))) | |
552 | (dolist (elt pair-ranges) | |
553 | (let ((from (car elt)) (to (cdr elt))) | |
554 | (while (< from to) | |
555 | (set-case-syntax-pair from (1+ from) tbl) | |
556 | (setq from (+ from 2)))))) | |
2bb915b8 | 557 | |
796f8b2f KH |
558 | (set-case-syntax-pair #x189 #x256 tbl) |
559 | (set-case-syntax-pair #x18A #x257 tbl) | |
560 | ||
2bb915b8 KH |
561 | ;; In some languages, such as Turkish, U+0049 LATIN CAPITAL LETTER I |
562 | ;; and U+0131 LATIN SMALL LETTER DOTLESS I make a case pair, and so | |
563 | ;; do U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN | |
564 | ;; SMALL LETTER I. | |
565 | ||
566 | ;; We used to set up half of those correspondence unconditionally, | |
567 | ;; but that makes searches slow. So now we don't set up either half | |
568 | ;; of these correspondences by default. | |
569 | ||
570 | ;; (set-downcase-syntax ?İ ?i tbl) | |
571 | ;; (set-upcase-syntax ?I ?ı tbl) | |
572 | ||
e6d10035 KH |
573 | (set-case-syntax-pair ?DŽ ?dž tbl) |
574 | (set-case-syntax-pair ?Dž ?dž tbl) | |
575 | (set-case-syntax-pair ?LJ ?lj tbl) | |
576 | (set-case-syntax-pair ?Lj ?lj tbl) | |
577 | (set-case-syntax-pair ?NJ ?nj tbl) | |
578 | (set-case-syntax-pair ?Nj ?nj tbl) | |
e5e381c8 | 579 | |
269a5dd0 | 580 | ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON |
e6d10035 KH |
581 | (set-case-syntax-pair ?DZ ?dz tbl) |
582 | (set-case-syntax-pair ?Dz ?dz tbl) | |
e6d10035 KH |
583 | (set-case-syntax-pair ?Ƕ ?ƕ tbl) |
584 | (set-case-syntax-pair ?Ƿ ?ƿ tbl) | |
269a5dd0 | 585 | |
85ef8ece | 586 | ;; Latin Extended Additional |
abdaa411 | 587 | (modify-category-entry '(#x1e00 . #x1ef9) ?l) |
85ef8ece | 588 | (setq c #x1e00) |
d05cfa1f | 589 | (while (<= c #x1ef9) |
d05cfa1f KH |
590 | (and (zerop (% c 2)) |
591 | (or (<= c #x1e94) (>= c #x1ea0)) | |
abdaa411 | 592 | (set-case-syntax-pair c (1+ c) tbl)) |
d05cfa1f KH |
593 | (setq c (1+ c))) |
594 | ||
85ef8ece | 595 | ;; Greek |
abdaa411 | 596 | (modify-category-entry '(#x0370 . #x03ff) ?g) |
85ef8ece | 597 | (setq c #x0370) |
d05cfa1f | 598 | (while (<= c #x03ff) |
d05cfa1f KH |
599 | (if (or (and (>= c #x0391) (<= c #x03a1)) |
600 | (and (>= c #x03a3) (<= c #x03ab))) | |
abdaa411 | 601 | (set-case-syntax-pair c (+ c 32) tbl)) |
d05cfa1f KH |
602 | (and (>= c #x03da) |
603 | (<= c #x03ee) | |
604 | (zerop (% c 2)) | |
abdaa411 | 605 | (set-case-syntax-pair c (1+ c) tbl)) |
d05cfa1f | 606 | (setq c (1+ c))) |
e6d10035 KH |
607 | (set-case-syntax-pair ?Ά ?ά tbl) |
608 | (set-case-syntax-pair ?Έ ?έ tbl) | |
609 | (set-case-syntax-pair ?Ή ?ή tbl) | |
610 | (set-case-syntax-pair ?Ί ?ί tbl) | |
611 | (set-case-syntax-pair ?Ό ?ό tbl) | |
612 | (set-case-syntax-pair ?Ύ ?ύ tbl) | |
613 | (set-case-syntax-pair ?Ώ ?ώ tbl) | |
d05cfa1f | 614 | |
269a5dd0 DL |
615 | ;; Armenian |
616 | (setq c #x531) | |
617 | (while (<= c #x556) | |
abdaa411 | 618 | (set-case-syntax-pair c (+ c #x30) tbl) |
269a5dd0 DL |
619 | (setq c (1+ c))) |
620 | ||
85ef8ece | 621 | ;; Greek Extended |
abdaa411 | 622 | (modify-category-entry '(#x1f00 . #x1fff) ?g) |
85ef8ece | 623 | (setq c #x1f00) |
d05cfa1f | 624 | (while (<= c #x1fff) |
d05cfa1f KH |
625 | (and (<= (logand c #x000f) 7) |
626 | (<= c #x1fa7) | |
796f8b2f KH |
627 | (not (memq c '(#x1f16 #x1f17 #x1f56 #x1f57 |
628 | #x1f50 #x1f52 #x1f54 #x1f56))) | |
629 | (/= (logand c #x00f0) #x70) | |
abdaa411 | 630 | (set-case-syntax-pair (+ c 8) c tbl)) |
d05cfa1f | 631 | (setq c (1+ c))) |
e6d10035 KH |
632 | (set-case-syntax-pair ?Ᾰ ?ᾰ tbl) |
633 | (set-case-syntax-pair ?Ᾱ ?ᾱ tbl) | |
634 | (set-case-syntax-pair ?Ὰ ?ὰ tbl) | |
635 | (set-case-syntax-pair ?Ά ?ά tbl) | |
636 | (set-case-syntax-pair ?ᾼ ?ᾳ tbl) | |
637 | (set-case-syntax-pair ?Ὲ ?ὲ tbl) | |
638 | (set-case-syntax-pair ?Έ ?έ tbl) | |
639 | (set-case-syntax-pair ?Ὴ ?ὴ tbl) | |
640 | (set-case-syntax-pair ?Ή ?ή tbl) | |
641 | (set-case-syntax-pair ?ῌ ?ῃ tbl) | |
642 | (set-case-syntax-pair ?Ῐ ?ῐ tbl) | |
643 | (set-case-syntax-pair ?Ῑ ?ῑ tbl) | |
644 | (set-case-syntax-pair ?Ὶ ?ὶ tbl) | |
645 | (set-case-syntax-pair ?Ί ?ί tbl) | |
646 | (set-case-syntax-pair ?Ῠ ?ῠ tbl) | |
647 | (set-case-syntax-pair ?Ῡ ?ῡ tbl) | |
648 | (set-case-syntax-pair ?Ὺ ?ὺ tbl) | |
649 | (set-case-syntax-pair ?Ύ ?ύ tbl) | |
650 | (set-case-syntax-pair ?Ῥ ?ῥ tbl) | |
651 | (set-case-syntax-pair ?Ὸ ?ὸ tbl) | |
652 | (set-case-syntax-pair ?Ό ?ό tbl) | |
653 | (set-case-syntax-pair ?Ὼ ?ὼ tbl) | |
654 | (set-case-syntax-pair ?Ώ ?ώ tbl) | |
655 | (set-case-syntax-pair ?ῼ ?ῳ tbl) | |
d05cfa1f | 656 | |
85ef8ece | 657 | ;; cyrillic |
abdaa411 | 658 | (modify-category-entry '(#x0400 . #x04FF) ?y) |
85ef8ece | 659 | (setq c #x0400) |
d05cfa1f | 660 | (while (<= c #x04ff) |
d05cfa1f KH |
661 | (and (>= c #x0400) |
662 | (<= c #x040f) | |
abdaa411 | 663 | (set-case-syntax-pair c (+ c 80) tbl)) |
d05cfa1f KH |
664 | (and (>= c #x0410) |
665 | (<= c #x042f) | |
abdaa411 | 666 | (set-case-syntax-pair c (+ c 32) tbl)) |
d05cfa1f KH |
667 | (and (zerop (% c 2)) |
668 | (or (and (>= c #x0460) (<= c #x0480)) | |
669 | (and (>= c #x048c) (<= c #x04be)) | |
670 | (and (>= c #x04d0) (<= c #x04f4))) | |
8f924df7 | 671 | (set-case-syntax-pair c (1+ c) tbl)) |
d05cfa1f | 672 | (setq c (1+ c))) |
e6d10035 KH |
673 | (set-case-syntax-pair ?Ӂ ?ӂ tbl) |
674 | (set-case-syntax-pair ?Ӄ ?ӄ tbl) | |
675 | (set-case-syntax-pair ?Ӈ ?ӈ tbl) | |
676 | (set-case-syntax-pair ?Ӌ ?ӌ tbl) | |
677 | (set-case-syntax-pair ?Ӹ ?ӹ tbl) | |
d05cfa1f | 678 | |
85ef8ece KH |
679 | ;; general punctuation |
680 | (setq c #x2000) | |
d05cfa1f KH |
681 | (while (<= c #x200b) |
682 | (set-case-syntax c " " tbl) | |
683 | (setq c (1+ c))) | |
b427c97e DL |
684 | (while (<= c #x200F) |
685 | (set-case-syntax c "." tbl) | |
686 | (setq c (1+ c))) | |
687 | ;; Fixme: These aren't all right: | |
6b61353c KH |
688 | (setq c #x2010) |
689 | (while (<= c #x2016) | |
690 | (set-case-syntax c "_" tbl) | |
691 | (setq c (1+ c))) | |
692 | ;; Punctuation syntax for quotation marks (like `) | |
693 | (while (<= c #x201f) | |
694 | (set-case-syntax c "." tbl) | |
695 | (setq c (1+ c))) | |
696 | ;; Fixme: These aren't all right: | |
d05cfa1f KH |
697 | (while (<= c #x2027) |
698 | (set-case-syntax c "_" tbl) | |
699 | (setq c (1+ c))) | |
b427c97e DL |
700 | (while (<= c #x206F) |
701 | (set-case-syntax c "." tbl) | |
702 | (setq c (1+ c))) | |
d05cfa1f | 703 | |
269a5dd0 DL |
704 | ;; Roman numerals |
705 | (setq c #x2160) | |
706 | (while (<= c #x216f) | |
abdaa411 | 707 | (set-case-syntax-pair c (+ c #x10) tbl) |
269a5dd0 DL |
708 | (setq c (1+ c))) |
709 | ||
4fb82d62 DL |
710 | ;; Fixme: The following blocks might be better as symbol rather than |
711 | ;; punctuation. | |
b427c97e DL |
712 | ;; Arrows |
713 | (setq c #x2190) | |
6ca54a3a DL |
714 | (while (<= c #x21FF) |
715 | (set-case-syntax c "." tbl) | |
b427c97e DL |
716 | (setq c (1+ c))) |
717 | ;; Mathematical Operators | |
718 | (while (<= c #x22FF) | |
6ca54a3a | 719 | (set-case-syntax c "." tbl) |
b427c97e DL |
720 | (setq c (1+ c))) |
721 | ;; Miscellaneous Technical | |
722 | (while (<= c #x23FF) | |
6ca54a3a | 723 | (set-case-syntax c "." tbl) |
b427c97e DL |
724 | (setq c (1+ c))) |
725 | ;; Control Pictures | |
726 | (while (<= c #x243F) | |
6ca54a3a | 727 | (set-case-syntax c "_" tbl) |
269a5dd0 DL |
728 | (setq c (1+ c))) |
729 | ||
730 | ;; Circled Latin | |
731 | (setq c #x24b6) | |
732 | (while (<= c #x24cf) | |
abdaa411 DL |
733 | (set-case-syntax-pair c (+ c 26) tbl) |
734 | (modify-category-entry c ?l) | |
735 | (modify-category-entry (+ c 26) ?l) | |
269a5dd0 DL |
736 | (setq c (1+ c))) |
737 | ||
738 | ;; Fullwidth Latin | |
739 | (setq c #xff21) | |
740 | (while (<= c #xff3a) | |
abdaa411 DL |
741 | (set-case-syntax-pair c (+ c #x20) tbl) |
742 | (modify-category-entry c ?l) | |
743 | (modify-category-entry (+ c #x20) ?l) | |
269a5dd0 DL |
744 | (setq c (1+ c))) |
745 | ||
269a5dd0 | 746 | ;; Combining diacritics |
abdaa411 | 747 | (modify-category-entry '(#x300 . #x362) ?^) |
269a5dd0 | 748 | ;; Combining marks |
abdaa411 | 749 | (modify-category-entry '(#x20d0 . #x20e3) ?^) |
269a5dd0 DL |
750 | |
751 | ;; Fixme: syntax for symbols &c | |
752 | ) | |
6b61353c KH |
753 | |
754 | (let ((pairs | |
e55a4d4e KH |
755 | '("⁅⁆" ; U+2045 U+2046 |
756 | "⁽⁾" ; U+207D U+207E | |
757 | "₍₎" ; U+208D U+208E | |
758 | "〈〉" ; U+2329 U+232A | |
759 | "⎴⎵" ; U+23B4 U+23B5 | |
760 | "❨❩" ; U+2768 U+2769 | |
761 | "❪❫" ; U+276A U+276B | |
762 | "❬❭" ; U+276C U+276D | |
763 | "❰❱" ; U+2770 U+2771 | |
764 | "❲❳" ; U+2772 U+2773 | |
765 | "❴❵" ; U+2774 U+2775 | |
766 | "⟦⟧" ; U+27E6 U+27E7 | |
767 | "⟨⟩" ; U+27E8 U+27E9 | |
768 | "⟪⟫" ; U+27EA U+27EB | |
769 | "⦃⦄" ; U+2983 U+2984 | |
770 | "⦅⦆" ; U+2985 U+2986 | |
771 | "⦇⦈" ; U+2987 U+2988 | |
772 | "⦉⦊" ; U+2989 U+298A | |
773 | "⦋⦌" ; U+298B U+298C | |
774 | "⦍⦎" ; U+298D U+298E | |
775 | "⦏⦐" ; U+298F U+2990 | |
776 | "⦑⦒" ; U+2991 U+2992 | |
777 | "⦓⦔" ; U+2993 U+2994 | |
778 | "⦕⦖" ; U+2995 U+2996 | |
779 | "⦗⦘" ; U+2997 U+2998 | |
780 | "⧼⧽" ; U+29FC U+29FD | |
781 | "〈〉" ; U+3008 U+3009 | |
782 | "《》" ; U+300A U+300B | |
783 | "「」" ; U+300C U+300D | |
784 | "『』" ; U+300E U+300F | |
785 | "【】" ; U+3010 U+3011 | |
786 | "〔〕" ; U+3014 U+3015 | |
787 | "〖〗" ; U+3016 U+3017 | |
788 | "〘〙" ; U+3018 U+3019 | |
789 | "〚〛" ; U+301A U+301B | |
790 | "﴾﴿" ; U+FD3E U+FD3F | |
791 | "︵︶" ; U+FE35 U+FE36 | |
792 | "︷︸" ; U+FE37 U+FE38 | |
793 | "︹︺" ; U+FE39 U+FE3A | |
794 | "︻︼" ; U+FE3B U+FE3C | |
795 | "︽︾" ; U+FE3D U+FE3E | |
796 | "︿﹀" ; U+FE3F U+FE40 | |
797 | "﹁﹂" ; U+FE41 U+FE42 | |
798 | "﹃﹄" ; U+FE43 U+FE44 | |
799 | "﹙﹚" ; U+FE59 U+FE5A | |
800 | "﹛﹜" ; U+FE5B U+FE5C | |
801 | "﹝﹞" ; U+FE5D U+FE5E | |
802 | "()" ; U+FF08 U+FF09 | |
803 | "[]" ; U+FF3B U+FF3D | |
804 | "{}" ; U+FF5B U+FF5D | |
805 | "⦅⦆" ; U+FF5F U+FF60 | |
806 | "「」" ; U+FF62 U+FF63 | |
6b61353c KH |
807 | ))) |
808 | (dolist (elt pairs) | |
809 | (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1))) | |
810 | (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0))))) | |
811 | ||
4ed46869 | 812 | \f |
777cfce6 | 813 | ;; For each character set, put the information of the most proper |
aaa9f206 | 814 | ;; coding system to encode it by `preferred-coding-system' property. |
777cfce6 | 815 | |
abdaa411 | 816 | ;; Fixme: should this be junked? |
777cfce6 KH |
817 | (let ((l '((latin-iso8859-1 . iso-latin-1) |
818 | (latin-iso8859-2 . iso-latin-2) | |
819 | (latin-iso8859-3 . iso-latin-3) | |
820 | (latin-iso8859-4 . iso-latin-4) | |
821 | (thai-tis620 . thai-tis620) | |
822 | (greek-iso8859-7 . greek-iso-8bit) | |
823 | (arabic-iso8859-6 . iso-2022-7bit) | |
824 | (hebrew-iso8859-8 . hebrew-iso-8bit) | |
825 | (katakana-jisx0201 . japanese-shift-jis) | |
826 | (latin-jisx0201 . japanese-shift-jis) | |
827 | (cyrillic-iso8859-5 . cyrillic-iso-8bit) | |
828 | (latin-iso8859-9 . iso-latin-5) | |
829 | (japanese-jisx0208-1978 . iso-2022-jp) | |
7870bdd9 KH |
830 | (chinese-gb2312 . chinese-iso-8bit) |
831 | (chinese-gbk . chinese-gbk) | |
832 | (gb18030-2-byte . chinese-gb18030) | |
833 | (gb18030-4-byte-bmp . chinese-gb18030) | |
834 | (gb18030-4-byte-smp . chinese-gb18030) | |
835 | (gb18030-4-byte-ext-1 . chinese-gb18030) | |
836 | (gb18030-4-byte-ext-2 . chinese-gb18030) | |
777cfce6 KH |
837 | (japanese-jisx0208 . iso-2022-jp) |
838 | (korean-ksc5601 . iso-2022-kr) | |
839 | (japanese-jisx0212 . iso-2022-jp) | |
777cfce6 KH |
840 | (chinese-big5-1 . chinese-big5) |
841 | (chinese-big5-2 . chinese-big5) | |
842 | (chinese-sisheng . iso-2022-7bit) | |
843 | (ipa . iso-2022-7bit) | |
844 | (vietnamese-viscii-lower . vietnamese-viscii) | |
845 | (vietnamese-viscii-upper . vietnamese-viscii) | |
846 | (arabic-digit . iso-2022-7bit) | |
847 | (arabic-1-column . iso-2022-7bit) | |
777cfce6 KH |
848 | (lao . lao) |
849 | (arabic-2-column . iso-2022-7bit) | |
850 | (indian-is13194 . devanagari) | |
69e138b2 | 851 | (indian-glyph . devanagari) |
777cfce6 | 852 | (tibetan-1-column . tibetan) |
58cd41a3 | 853 | (ethiopic . iso-2022-7bit) |
7870bdd9 KH |
854 | (chinese-cns11643-1 . iso-2022-cn) |
855 | (chinese-cns11643-2 . iso-2022-cn) | |
777cfce6 KH |
856 | (chinese-cns11643-3 . iso-2022-cn) |
857 | (chinese-cns11643-4 . iso-2022-cn) | |
858 | (chinese-cns11643-5 . iso-2022-cn) | |
859 | (chinese-cns11643-6 . iso-2022-cn) | |
860 | (chinese-cns11643-7 . iso-2022-cn) | |
861 | (indian-2-column . devanagari) | |
7a860cf2 DL |
862 | (tibetan . tibetan) |
863 | (latin-iso8859-14 . iso-latin-8) | |
864 | (latin-iso8859-15 . iso-latin-9)))) | |
777cfce6 | 865 | (while l |
aaa9f206 | 866 | (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l))) |
777cfce6 | 867 | (setq l (cdr l)))) |
df0415c5 KH |
868 | |
869 | \f | |
98a663f1 | 870 | ;; Setup auto-fill-chars for charsets that should invoke auto-filling. |
7760ba82 | 871 | ;; SPACE and NEWLINE are already set. |
df21429c KH |
872 | |
873 | (set-char-table-range auto-fill-chars '(#x3041 . #x30FF) t) | |
874 | (set-char-table-range auto-fill-chars '(#x3400 . #x4DB5) t) | |
875 | (set-char-table-range auto-fill-chars '(#x4e00 . #x9fbb) t) | |
876 | (set-char-table-range auto-fill-chars '(#xF900 . #xFAFF) t) | |
877 | (set-char-table-range auto-fill-chars '(#xFF00 . #xFF9F) t) | |
878 | (set-char-table-range auto-fill-chars '(#x20000 . #x2FFFF) t) | |
879 | ||
55bd52ea | 880 | \f |
7760ba82 KH |
881 | ;;; Setting char-width-table. The default is 1. |
882 | ||
883 | ;; 0: non-spacing, enclosing combining, formatting, Hangul Jamo medial | |
884 | ;; and final characters. | |
a2a22302 | 885 | (let ((l '((#x0300 . #x036F) |
7760ba82 KH |
886 | (#x0483 . #x0489) |
887 | (#x0591 . #x05BD) | |
888 | (#x05BF . #x05BF) | |
889 | (#x05C1 . #x05C2) | |
890 | (#x05C4 . #x05C5) | |
891 | (#x05C7 . #x05C7) | |
892 | (#x0600 . #x0603) | |
893 | (#x0610 . #x0615) | |
894 | (#x064B . #x065E) | |
895 | (#x0670 . #x0670) | |
896 | (#x06D6 . #x06E4) | |
897 | (#x06E7 . #x06E8) | |
898 | (#x06EA . #x06ED) | |
899 | (#x070F . #x070F) | |
900 | (#x0711 . #x0711) | |
901 | (#x0730 . #x074A) | |
902 | (#x07A6 . #x07B0) | |
903 | (#x07EB . #x07F3) | |
904 | (#x0901 . #x0902) | |
905 | (#x093C . #x093C) | |
906 | (#x0941 . #x0948) | |
907 | (#x094D . #x094D) | |
908 | (#x0951 . #x0954) | |
909 | (#x0962 . #x0963) | |
910 | (#x0981 . #x0981) | |
911 | (#x09BC . #x09BC) | |
912 | (#x09C1 . #x09C4) | |
913 | (#x09CD . #x09CD) | |
914 | (#x09E2 . #x09E3) | |
915 | (#x0A01 . #x0A02) | |
916 | (#x0A3C . #x0A3C) | |
917 | (#x0A41 . #x0A4D) | |
918 | (#x0A70 . #x0A71) | |
919 | (#x0A81 . #x0A82) | |
920 | (#x0ABC . #x0ABC) | |
921 | (#x0AC1 . #x0AC8) | |
922 | (#x0ACD . #x0ACD) | |
923 | (#x0AE2 . #x0AE3) | |
924 | (#x0B01 . #x0B01) | |
925 | (#x0B3C . #x0B3C) | |
926 | (#x0B3F . #x0B3F) | |
927 | (#x0B41 . #x0B43) | |
928 | (#x0B4D . #x0B56) | |
929 | (#x0B82 . #x0B82) | |
930 | (#x0BC0 . #x0BC0) | |
931 | (#x0BCD . #x0BCD) | |
932 | (#x0C3E . #x0C40) | |
933 | (#x0C46 . #x0C56) | |
934 | (#x0CBC . #x0CBC) | |
935 | (#x0CBF . #x0CBF) | |
936 | (#x0CC6 . #x0CC6) | |
937 | (#x0CCC . #x0CCD) | |
938 | (#x0CE2 . #x0CE3) | |
939 | (#x0D41 . #x0D43) | |
940 | (#x0D4D . #x0D4D) | |
941 | (#x0DCA . #x0DCA) | |
942 | (#x0DD2 . #x0DD6) | |
943 | (#x0E31 . #x0E31) | |
944 | (#x0E34 . #x0E3A) | |
945 | (#x0E47 . #x0E4E) | |
946 | (#x0EB1 . #x0EB1) | |
947 | (#x0EB4 . #x0EBC) | |
948 | (#x0EC8 . #x0ECD) | |
949 | (#x0F18 . #x0F19) | |
950 | (#x0F35 . #x0F35) | |
951 | (#x0F37 . #x0F37) | |
952 | (#x0F39 . #x0F39) | |
953 | (#x0F71 . #x0F7E) | |
954 | (#x0F80 . #x0F84) | |
955 | (#x0F86 . #x0F87) | |
956 | (#x0F90 . #x0FBC) | |
957 | (#x0FC6 . #x0FC6) | |
958 | (#x102D . #x1030) | |
959 | (#x1032 . #x1037) | |
960 | (#x1039 . #x1039) | |
961 | (#x1058 . #x1059) | |
962 | (#x1160 . #x11FF) | |
963 | (#x135F . #x135F) | |
964 | (#x1712 . #x1714) | |
965 | (#x1732 . #x1734) | |
966 | (#x1752 . #x1753) | |
967 | (#x1772 . #x1773) | |
968 | (#x17B4 . #x17B5) | |
969 | (#x17B7 . #x17BD) | |
970 | (#x17C6 . #x17C6) | |
971 | (#x17C9 . #x17D3) | |
972 | (#x17DD . #x17DD) | |
973 | (#x180B . #x180D) | |
974 | (#x18A9 . #x18A9) | |
975 | (#x1920 . #x1922) | |
976 | (#x1927 . #x1928) | |
977 | (#x1932 . #x1932) | |
978 | (#x1939 . #x193B) | |
979 | (#x1A17 . #x1A18) | |
980 | (#x1B00 . #x1B03) | |
981 | (#x1B34 . #x1B34) | |
982 | (#x1B36 . #x1B3A) | |
983 | (#x1B3C . #x1B3C) | |
984 | (#x1B42 . #x1B42) | |
985 | (#x1B6B . #x1B73) | |
986 | (#x1DC0 . #x1DFF) | |
987 | (#x200B . #x200F) | |
988 | (#x202A . #x202E) | |
989 | (#x2060 . #x206F) | |
990 | (#x20D0 . #x20EF) | |
991 | (#x302A . #x302F) | |
992 | (#x3099 . #x309A) | |
993 | (#xA806 . #xA806) | |
994 | (#xA80B . #xA80B) | |
995 | (#xA825 . #xA826) | |
996 | (#xFB1E . #xFB1E) | |
997 | (#xFE00 . #xFE0F) | |
998 | (#xFE20 . #xFE23) | |
999 | (#xFEFF . #xFEFF) | |
1000 | (#xFFF9 . #xFFFB) | |
1001 | (#x10A01 . #x10A0F) | |
1002 | (#x10A38 . #x10A3F) | |
1003 | (#x1D167 . #x1D169) | |
1004 | (#x1D173 . #x1D182) | |
1005 | (#x1D185 . #x1D18B) | |
1006 | (#x1D1AA . #x1D1AD) | |
1007 | (#x1D242 . #x1D244) | |
1008 | (#xE0001 . #xE01EF)))) | |
1009 | (dolist (elt l) | |
1010 | (set-char-table-range char-width-table elt 0))) | |
1011 | ||
1012 | ;; 2: East Asian Wide and Full-width characters. | |
1013 | (let ((l '((#x1100 . #x115F) | |
1014 | (#x2329 . #x232A) | |
1015 | (#x2E80 . #x303E) | |
1016 | (#x3040 . #xA4CF) | |
1017 | (#xAC00 . #xD7A3) | |
ed0cb465 | 1018 | (#xF900 . #xFAFF) |
7760ba82 | 1019 | (#xFE30 . #xFE6F) |
bb5c62cf | 1020 | (#xFF01 . #xFF60) |
7760ba82 KH |
1021 | (#xFFE0 . #xFFE6) |
1022 | (#x20000 . #x2FFFF) | |
1023 | (#x30000 . #x3FFFF)))) | |
ed0cb465 | 1024 | (dolist (elt l) |
7760ba82 | 1025 | (set-char-table-range char-width-table elt 2))) |
173f18ce DL |
1026 | |
1027 | ;; Other double width | |
7760ba82 KH |
1028 | ;;(map-charset-chars |
1029 | ;; (lambda (range ignore) (set-char-table-range char-width-table range 2)) | |
1030 | ;; 'ethiopic) | |
1031 | ;; (map-charset-chars | |
1032 | ;; (lambda (range ignore) (set-char-table-range char-width-table range 2)) | |
1033 | ;; 'tibetan) | |
173f18ce DL |
1034 | (map-charset-chars |
1035 | (lambda (range ignore) (set-char-table-range char-width-table range 2)) | |
1036 | 'indian-2-column) | |
1037 | (map-charset-chars | |
1038 | (lambda (range ignore) (set-char-table-range char-width-table range 2)) | |
1039 | 'arabic-2-column) | |
777cfce6 | 1040 | |
dbff07a2 KH |
1041 | ;; Internal use only. |
1042 | ;; Alist of locale symbol vs charsets. In a language environment | |
1043 | ;; corresponding to the locale, width of characters in the charsets is | |
1044 | ;; set to 2. Each element has the form: | |
1045 | ;; (LOCALE TABLE (CHARSET (FROM-CODE . TO-CODE) ...) ...) | |
1046 | ;; LOCALE: locale symbol | |
1047 | ;; TABLE: char-table used for char-width-table, initially nil. | |
1048 | ;; CAHRSET: character set | |
1049 | ;; FROM-CODE, TO-CODE: range of code-points in CHARSET | |
1050 | ||
1051 | (defvar cjk-char-width-table-list | |
1052 | '((ja_JP nil (japanese-jisx0208 (#x2121 . #x287E)) | |
1053 | (cp932-2-byte (#x8140 . #x879F))) | |
1054 | (zh_CN nil (chinese-gb2312 (#x2121 . #x297E))) | |
1055 | (zh_HK nil (big5-hkscs (#xA140 . #xA3FE) (#xC6A0 . #xC8FE))) | |
1056 | (zh_TW nil (big5 (#xA140 . #xA3FE)) | |
1057 | (chinese-cns11643-1 (#x2121 . #x427E))) | |
1058 | (ko_KR nil (korean-ksc5601 (#x2121 . #x2C7E))))) | |
1059 | ||
1060 | ;; Internal use only. | |
1061 | ;; Setup char-width-table appropriate for a language environment | |
1062 | ;; corresponding to LOCALE-NAME (symbol). | |
1063 | ||
1064 | (defun use-cjk-char-width-table (locale-name) | |
1065 | (while (char-table-parent char-width-table) | |
1066 | (setq char-width-table (char-table-parent char-width-table))) | |
1067 | (let ((slot (assq locale-name cjk-char-width-table-list)) | |
1068 | table) | |
1069 | (or slot (error "Unknown locale for CJK language environment: %s" | |
1070 | locale-name)) | |
1071 | (unless (nth 1 slot) | |
1072 | (let ((table (make-char-table nil))) | |
1073 | (dolist (charset-info (nthcdr 2 slot)) | |
1074 | (let ((charset (car charset-info))) | |
1075 | (dolist (code-range (cdr charset-info)) | |
1076 | (map-charset-chars #'(lambda (range arg) | |
1077 | (set-char-table-range table range 2)) | |
1078 | charset nil | |
1079 | (car code-range) (cdr code-range))))) | |
1080 | (optimize-char-table table) | |
1081 | (set-char-table-parent table char-width-table) | |
1082 | (setcar (cdr slot) table))) | |
1083 | (setq char-width-table (nth 1 slot)))) | |
55a3ed16 KH |
1084 | |
1085 | (defun use-default-char-width-table () | |
1086 | "Internal use only. | |
9f336de0 | 1087 | Setup char-width-table appropriate for non-CJK language environment." |
dbff07a2 KH |
1088 | (while (char-table-parent char-width-table) |
1089 | (setq char-width-table (char-table-parent char-width-table)))) | |
55a3ed16 | 1090 | |
87a39edb | 1091 | (optimize-char-table (standard-case-table)) |
87a39edb DL |
1092 | (optimize-char-table (standard-syntax-table)) |
1093 | ||
55a3ed16 KH |
1094 | \f |
1095 | ;; Setting char-script-table. | |
1096 | ||
b427c97e DL |
1097 | ;; The Unicode blocks actually extend past some of these ranges with |
1098 | ;; undefined codepoints. | |
9ce5de1c KH |
1099 | (let ((script-list nil)) |
1100 | (dolist | |
1101 | (elt | |
b982c760 | 1102 | '((#x0000 #x007F latin) |
6c52dd78 JR |
1103 | (#x00A0 #x024F latin) |
1104 | (#x0250 #x02AF phonetic) | |
1105 | (#x02B0 #x036F latin) | |
9ce5de1c KH |
1106 | (#x0370 #x03E1 greek) |
1107 | (#x03E2 #x03EF coptic) | |
1108 | (#x03F0 #x03F3 greek) | |
1109 | (#x0400 #x04FF cyrillic) | |
1110 | (#x0530 #x058F armenian) | |
1111 | (#x0590 #x05FF hebrew) | |
1112 | (#x0600 #x06FF arabic) | |
1113 | (#x0700 #x074F syriac) | |
e7da2f38 | 1114 | (#x07C0 #x07FA nko) |
9ce5de1c KH |
1115 | (#x0780 #x07BF thaana) |
1116 | (#x0900 #x097F devanagari) | |
1117 | (#x0980 #x09FF bengali) | |
1118 | (#x0A00 #x0A7F gurmukhi) | |
1119 | (#x0A80 #x0AFF gujarati) | |
1120 | (#x0B00 #x0B7F oriya) | |
1121 | (#x0B80 #x0BFF tamil) | |
1122 | (#x0C00 #x0C7F telugu) | |
1123 | (#x0C80 #x0CFF kannada) | |
1124 | (#x0D00 #x0D7F malayalam) | |
1125 | (#x0D80 #x0DFF sinhala) | |
1126 | (#x0E00 #x0E5F thai) | |
1127 | (#x0E80 #x0EDF lao) | |
1128 | (#x0F00 #x0FFF tibetan) | |
d99ea08e | 1129 | (#x1000 #x109F burmese) |
9ce5de1c KH |
1130 | (#x10A0 #x10FF georgian) |
1131 | (#x1100 #x11FF hangul) | |
4c81b0f6 | 1132 | (#x1200 #x139F ethiopic) |
9ce5de1c KH |
1133 | (#x13A0 #x13FF cherokee) |
1134 | (#x1400 #x167F canadian-aboriginal) | |
1135 | (#x1680 #x169F ogham) | |
1136 | (#x16A0 #x16FF runic) | |
1137 | (#x1780 #x17FF khmer) | |
1138 | (#x1800 #x18AF mongolian) | |
6c52dd78 | 1139 | (#x1D00 #x1DFF phonetic) |
9ce5de1c KH |
1140 | (#x1E00 #x1EFF latin) |
1141 | (#x1F00 #x1FFF greek) | |
f041d33e | 1142 | (#x2000 #x27FF symbol) |
9ce5de1c | 1143 | (#x2800 #x28FF braille) |
4c81b0f6 | 1144 | (#x2D80 #x2DDF ethiopic) |
9ce5de1c KH |
1145 | (#x2E80 #x2FDF han) |
1146 | (#x2FF0 #x2FFF ideographic-description) | |
1147 | (#x3000 #x303F cjk-misc) | |
1148 | (#x3040 #x30FF kana) | |
1149 | (#x3100 #x312F bopomofo) | |
1150 | (#x3130 #x318F hangul) | |
1151 | (#x3190 #x319F kanbun) | |
1152 | (#x31A0 #x31BF bopomofo) | |
1153 | (#x3400 #x9FAF han) | |
1154 | (#xA000 #xA4CF yi) | |
1ffae953 | 1155 | (#xAA00 #xAA5F cham) |
d99ea08e | 1156 | (#xAA60 #xAA7B burmese) |
d807d0c7 | 1157 | (#xAA80 #xAADF tai-viet) |
9ce5de1c | 1158 | (#xAC00 #xD7AF hangul) |
95ac45fa | 1159 | (#xF900 #xFAFF han) |
9ce5de1c KH |
1160 | (#xFB1D #xFB4F hebrew) |
1161 | (#xFB50 #xFDFF arabic) | |
1162 | (#xFE70 #xFEFC arabic) | |
1163 | (#xFF00 #xFF5F cjk-misc) | |
1164 | (#xFF61 #xFF9F kana) | |
1165 | (#xFFE0 #xFFE6 cjk-misc) | |
458888ab KH |
1166 | (#x10000 #x100FF linear-b) |
1167 | (#x10100 #x1013F aegean-number) | |
1168 | (#x10140 #x1018A ancient-greek-number) | |
1169 | (#x10190 #x1019B ancient-symbol) | |
1170 | (#x101D0 #x101FF phaistos-disc) | |
1171 | (#x10280 #x1029F lycian) | |
1172 | (#x102A0 #x102DF carian) | |
1173 | (#x10300 #x1032F olt-italic) | |
1174 | (#x10380 #x1039F ugaritic) | |
1175 | (#x103A0 #x103DF old-persian) | |
1176 | (#x10400 #x1044F deseret) | |
1177 | (#x10450 #x1047F shavian) | |
1178 | (#x10480 #x104AF osmanya) | |
1179 | (#x10800 #x1083F cypriot-syllabary) | |
1180 | (#x10900 #x1091F phoenician) | |
1181 | (#x10920 #x1093F lydian) | |
1182 | (#x10A00 #x10A5F kharoshthi) | |
1183 | (#x12000 #x123FF cuneiform) | |
1184 | (#x12400 #x1247F cuneiform-numbers-and-punctuation) | |
e7da2f38 KH |
1185 | (#x1D000 #x1D0FF byzantine-musical-symbol) |
1186 | (#x1D100 #x1D1FF musical-symbol) | |
458888ab KH |
1187 | (#x1D200 #x1D24F ancient-greek-musical-notation) |
1188 | (#x1D300 #x1D35F tai-xuan-jing-symbol) | |
1189 | (#x1D360 #x1D37F counting-rod-numeral) | |
e7da2f38 | 1190 | (#x1D400 #x1D7FF mathematical) |
458888ab KH |
1191 | (#x1F000 #x1F02F mahjong-tile) |
1192 | (#x1F030 #x1F09F domino-tile) | |
e7259832 | 1193 | (#x20000 #x2AFFF han) |
9ce5de1c KH |
1194 | (#x2F800 #x2FFFF han))) |
1195 | (set-char-table-range char-script-table | |
1196 | (cons (car elt) (nth 1 elt)) (nth 2 elt)) | |
1197 | (or (memq (nth 2 elt) script-list) | |
1198 | (setq script-list (cons (nth 2 elt) script-list)))) | |
1199 | (set-char-table-extra-slot char-script-table 0 (nreverse script-list))) | |
1200 | ||
8f924df7 | 1201 | (map-charset-chars |
cdfc5141 KH |
1202 | #'(lambda (range ignore) |
1203 | (set-char-table-range char-script-table range 'tibetan)) | |
1204 | 'tibetan) | |
1205 | ||
e7259832 | 1206 | \f |
59db3a5c KH |
1207 | ;;; Setting unicode-category-table. |
1208 | ||
c805dec0 KH |
1209 | (setq unicode-category-table |
1210 | (unicode-property-table-internal 'general-category)) | |
8ea6fa80 KH |
1211 | (map-char-table #'(lambda (key val) |
1212 | (if (and val | |
1213 | (or (and (/= (aref (symbol-name val) 0) ?M) | |
1214 | (/= (aref (symbol-name val) 0) ?C)) | |
1215 | (eq val 'Zs))) | |
1216 | (modify-category-entry key ?.))) | |
1217 | unicode-category-table) | |
1218 | ||
1219 | (optimize-char-table (standard-category-table)) | |
59db3a5c KH |
1220 | |
1221 | \f | |
b2cca856 KH |
1222 | ;; Display of glyphless characters. |
1223 | ||
1224 | (defvar char-acronym-table | |
1225 | (make-char-table 'char-acronym-table nil) | |
1226 | "Char table of acronyms for non-graphic characters.") | |
1227 | ||
1228 | (let ((c0-acronyms '("NUL" "SOH" "STX" "ETX" "EOT" "ENQ" "ACK" "BEL" | |
1229 | "BS" nil nil "VT" "FF" "CR" "SO" "SI" | |
1230 | "DLE" "DC1" "DC2" "DC3" "DC4" "NAK" "SYN" "ETB" | |
1231 | "CAN" "EM" "SUB" "ESC" "FC" "GS" "RS" "US"))) | |
1232 | (dotimes (i 32) | |
1233 | (aset char-acronym-table i (car c0-acronyms)) | |
1234 | (setq c0-acronyms (cdr c0-acronyms)))) | |
1235 | ||
1236 | (let ((c1-acronyms '("XXX" "XXX" "BPH" "NBH" "IND" "NEL" "SSA" "ESA" | |
1237 | "HTS" "HTJ" "VTS" "PLD" "PLU" "R1" "SS2" "SS1" | |
1238 | "DCS" "PU1" "PU2" "STS" "CCH" "MW" "SPA" "EPA" | |
1239 | "SOS" "XXX" "SC1" "CSI" "ST" "OSC" "PM" "APC"))) | |
1240 | (dotimes (i 32) | |
1241 | (aset char-acronym-table (+ #x0080 i) (car c1-acronyms)) | |
1242 | (setq c1-acronyms (cdr c1-acronyms)))) | |
1243 | ||
1244 | (aset char-acronym-table #x17B4 "KIVAQ") ; KHMER VOWEL INHERENT AQ | |
1245 | (aset char-acronym-table #x17B5 "KIVAA") ; KHMER VOWEL INHERENT AA | |
1246 | (aset char-acronym-table #x200B "ZWSP") ; ZERO WIDTH SPACE | |
1247 | (aset char-acronym-table #x200C "ZWNJ") ; ZERO WIDTH NON-JOINER | |
1248 | (aset char-acronym-table #x200D "ZWJ") ; ZERO WIDTH JOINER | |
1249 | (aset char-acronym-table #x200E "LRM") ; LEFT-TO-RIGHT MARK | |
1250 | (aset char-acronym-table #x200F "RLM") ; RIGHT-TO-LEFT MARK | |
1251 | (aset char-acronym-table #x202A "LRE") ; LEFT-TO-RIGHT EMBEDDING | |
1252 | (aset char-acronym-table #x202B "RLE") ; RIGHT-TO-LEFT EMBEDDING | |
1253 | (aset char-acronym-table #x202C "PDF") ; POP DIRECTIONAL FORMATTING | |
1254 | (aset char-acronym-table #x202D "LRO") ; LEFT-TO-RIGHT OVERRIDE | |
1255 | (aset char-acronym-table #x202E "RLO") ; RIGHT-TO-LEFT OVERRIDE | |
1256 | (aset char-acronym-table #x2060 "WJ") ; WORD JOINER | |
1257 | (aset char-acronym-table #x206A "ISS") ; INHIBIT SYMMETRIC SWAPPING | |
1258 | (aset char-acronym-table #x206B "ASS") ; ACTIVATE SYMMETRIC SWAPPING | |
1259 | (aset char-acronym-table #x206C "IAFS") ; INHIBIT ARABIC FORM SHAPING | |
1260 | (aset char-acronym-table #x206D "AAFS") ; ACTIVATE ARABIC FORM SHAPING | |
1261 | (aset char-acronym-table #x206E "NADS") ; NATIONAL DIGIT SHAPES | |
1262 | (aset char-acronym-table #x206F "NODS") ; NOMINAL DIGIT SHAPES | |
1263 | (aset char-acronym-table #xFEFF "ZWNBSP") ; ZERO WIDTH NO-BREAK SPACE | |
1264 | (aset char-acronym-table #xFFF9 "IAA") ; INTERLINEAR ANNOTATION ANCHOR | |
1265 | (aset char-acronym-table #xFFFA "IAS") ; INTERLINEAR ANNOTATION SEPARATOR | |
1266 | (aset char-acronym-table #xFFFB "IAT") ; INTERLINEAR ANNOTATION TERMINATOR | |
1267 | (aset char-acronym-table #x1D173 "BEGBM") ; MUSICAL SYMBOL BEGIN BEAM | |
1268 | (aset char-acronym-table #x1D174 "ENDBM") ; MUSICAL SYMBOL END BEAM | |
1269 | (aset char-acronym-table #x1D175 "BEGTIE") ; MUSICAL SYMBOL BEGIN TIE | |
1270 | (aset char-acronym-table #x1D176 "END") ; MUSICAL SYMBOL END TIE | |
1271 | (aset char-acronym-table #x1D177 "BEGSLR") ; MUSICAL SYMBOL BEGIN SLUR | |
1272 | (aset char-acronym-table #x1D178 "ENDSLR") ; MUSICAL SYMBOL END SLUR | |
1273 | (aset char-acronym-table #x1D179 "BEGPHR") ; MUSICAL SYMBOL BEGIN PHRASE | |
1274 | (aset char-acronym-table #x1D17A "ENDPHR") ; MUSICAL SYMBOL END PHRASE | |
1275 | (aset char-acronym-table #xE0001 "|->TAG") ; LANGUAGE TAG | |
1276 | (aset char-acronym-table #xE0020 "SP TAG") ; TAG SPACE | |
1277 | (dotimes (i 94) | |
1278 | (aset char-acronym-table (+ #xE0021 i) (format " %c TAG" (+ 33 i)))) | |
1279 | (aset char-acronym-table #xE007F "->|TAG") ; CANCEL TAG | |
1280 | ||
0e7c0582 | 1281 | (defun update-glyphless-char-display (&optional variable value) |
0eb025fb | 1282 | "Make the setting of `glyphless-char-display-control' take effect. |
b2cca856 | 1283 | This function updates the char-table `glyphless-char-display'." |
0e7c0582 EZ |
1284 | (when value |
1285 | (set-default variable value)) | |
1286 | (dolist (elt value) | |
b2cca856 KH |
1287 | (let ((target (car elt)) |
1288 | (method (cdr elt))) | |
0eb025fb EZ |
1289 | (or (memq method '(zero-width thin-space empty-box acronym hex-code)) |
1290 | (error "Invalid glyphless character display method: %s" method)) | |
b2cca856 KH |
1291 | (cond ((eq target 'c0-control) |
1292 | (set-char-table-range glyphless-char-display '(#x00 . #x1F) | |
96107967 EZ |
1293 | method) |
1294 | ;; Users will not expect their newlines and TABs be | |
1295 | ;; displayed as anything but themselves, so exempt those | |
1296 | ;; two characters from c0-control. | |
1297 | (set-char-table-range glyphless-char-display #x9 nil) | |
1298 | (set-char-table-range glyphless-char-display #xa nil)) | |
b2cca856 KH |
1299 | ((eq target 'c1-control) |
1300 | (set-char-table-range glyphless-char-display '(#x80 . #x9F) | |
1301 | method)) | |
1302 | ((eq target 'format-control) | |
1303 | (map-char-table | |
1304 | #'(lambda (char category) | |
1305 | (if (eq category 'Cf) | |
1306 | (let ((this-method method) | |
1307 | from to) | |
1308 | (if (consp char) | |
1309 | (setq from (car char) to (cdr char)) | |
1310 | (setq from char to char)) | |
1311 | (while (<= from to) | |
1312 | (when (/= from #xAD) | |
1313 | (if (eq method 'acronym) | |
0eb025fb | 1314 | (setq this-method |
b2cca856 KH |
1315 | (aref char-acronym-table from))) |
1316 | (set-char-table-range glyphless-char-display | |
1317 | from this-method)) | |
1318 | (setq from (1+ from)))))) | |
1319 | unicode-category-table)) | |
1320 | ((eq target 'no-font) | |
1321 | (set-char-table-extra-slot glyphless-char-display 0 method)) | |
1322 | (t | |
0eb025fb | 1323 | (error "Invalid glyphless character group: %s" target)))))) |
b2cca856 | 1324 | |
0e7c0582 EZ |
1325 | ;;; Control of displaying glyphless characters. |
1326 | (defcustom glyphless-char-display-control | |
1327 | '((format-control . thin-space) | |
1328 | (no-font . hex-code)) | |
1329 | "List of directives to control display of glyphless characters. | |
1330 | ||
1331 | Each element has the form (GROUP . METHOD), where GROUP is a | |
1332 | symbol specifying the character group, and METHOD is a symbol | |
1333 | specifying the method of displaying characters belonging to that | |
1334 | group. | |
1335 | ||
1336 | GROUP must be one of these symbols: | |
96107967 | 1337 | `c0-control': U+0000..U+001F, but excluding newline and TAB. |
0e7c0582 EZ |
1338 | `c1-control': U+0080..U+009F. |
1339 | `format-control': Characters of Unicode General Category `Cf', | |
1340 | such as U+200C (ZWNJ), U+200E (LRM), but | |
1341 | excluding characters that have graphic images, | |
1342 | such as U+00AD (SHY). | |
1343 | `no-font': characters for which no suitable font is found. | |
1344 | For character terminals, characters that cannot | |
1345 | be encoded by `terminal-coding-system'. | |
1346 | ||
1347 | METHOD must be one of these symbols: | |
1348 | `zero-width': don't display. | |
1349 | `thin-space': display a thin (1-pixel width) space. On character | |
1350 | terminals, display as 1-character space. | |
1351 | `empty-box': display an empty box. | |
1352 | `acronym': display an acronym of the character in a box. The | |
1353 | acronym is taken from `char-acronym-table', which see. | |
1354 | `hex-code': display the hexadecimal character code in a box." | |
1355 | ||
1356 | :type '(alist :key-type (symbol :tag "Character Group") | |
1357 | :value-type (symbol :tag "Display Method")) | |
1358 | :options '((c0-control | |
1359 | (choice (const :tag "Don't display" zero-width) | |
1360 | (const :tag "Display as thin space" thin-space) | |
1361 | (const :tag "Display as empty box" empty-box) | |
1362 | (const :tag "Display acronym" acronym) | |
1363 | (const :tag "Display hex code in a box" hex-code))) | |
1364 | (c1-control | |
1365 | (choice (const :tag "Don't display" zero-width) | |
1366 | (const :tag "Display as thin space" thin-space) | |
1367 | (const :tag "Display as empty box" empty-box) | |
1368 | (const :tag "Display acronym" acronym) | |
1369 | (const :tag "Display hex code in a box" hex-code))) | |
1370 | (format-control | |
1371 | (choice (const :tag "Don't display" zero-width) | |
1372 | (const :tag "Display as thin space" thin-space) | |
1373 | (const :tag "Display as empty box" empty-box) | |
1374 | (const :tag "Display acronym" acronym) | |
1375 | (const :tag "Display hex code in a box" hex-code))) | |
1376 | (no-font | |
1377 | (choice (const :tag "Don't display" zero-width) | |
1378 | (const :tag "Display as thin space" thin-space) | |
1379 | (const :tag "Display as empty box" empty-box) | |
1380 | (const :tag "Display acronym" acronym) | |
1381 | (const :tag "Display hex code in a box" hex-code)))) | |
1382 | :set 'update-glyphless-char-display | |
1383 | :group 'display) | |
1384 | ||
b2cca856 | 1385 | \f |
e7259832 KH |
1386 | ;;; Setting word boundary. |
1387 | ||
e7259832 | 1388 | (setq word-combining-categories |
4626499f KH |
1389 | '((nil . ?^) |
1390 | (?^ . nil) | |
7ffefb08 MB |
1391 | (?C . ?H) |
1392 | (?C . ?K))) | |
e7259832 KH |
1393 | |
1394 | (setq word-separating-categories ; (2-byte character sets) | |
4626499f | 1395 | '((?H . ?K) ; Hiragana - Katakana |
e7259832 KH |
1396 | )) |
1397 | ||
1cbfaab9 | 1398 | ;; Local Variables: |
985773c9 | 1399 | ;; coding: utf-8 |
1cbfaab9 | 1400 | ;; End: |
777cfce6 | 1401 | |
60370d40 | 1402 | ;;; characters.el ends here |