Merge from emacs-23
[bpt/emacs.git] / lisp / international / characters.el
CommitLineData
4ed46869
KH
1;;; characters.el --- set syntax and category for multibyte characters
2
5df4f04c 3;; Copyright (C) 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
2fd125a3 4;; Free Software Foundation, Inc.
7976eda0 5;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5df4f04c 6;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
2fd125a3
KH
7;; National Institute of Advanced Industrial Science and Technology (AIST)
8;; Registration Number H14PRO021
8f924df7 9;; Copyright (C) 2003
55bd52ea
KH
10;; National Institute of Advanced Industrial Science and Technology (AIST)
11;; Registration Number H13PRO009
4ed46869
KH
12
13;; Keywords: multibyte character, character set, syntax, category
14
15;; This file is part of GNU Emacs.
16
4936186e 17;; GNU Emacs is free software: you can redistribute it and/or modify
4ed46869 18;; it under the terms of the GNU General Public License as published by
4936186e
GM
19;; the Free Software Foundation, either version 3 of the License, or
20;; (at your option) any later version.
4ed46869
KH
21
22;; GNU Emacs is distributed in the hope that it will be useful,
23;; but WITHOUT ANY WARRANTY; without even the implied warranty of
24;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25;; GNU General Public License for more details.
26
27;; You should have received a copy of the GNU General Public License
4936186e 28;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
4ed46869
KH
29
30;;; Commentary:
31
60370d40
PJ
32;;; Code:
33
4ed46869
KH
34;;; Predefined categories.
35
36;; For each character set.
37
46bf60bc
KH
38(define-category ?a "ASCII
39ASCII graphic characters 32-126 (ISO646 IRV:1983[4/0])")
4ed46869
KH
40(define-category ?l "Latin")
41(define-category ?t "Thai")
42(define-category ?g "Greek")
43(define-category ?b "Arabic")
44(define-category ?w "Hebrew")
45(define-category ?y "Cyrillic")
46bf60bc
KH
46(define-category ?k "Katakana
47Japanese katakana")
48(define-category ?r "Roman
49Japanese roman")
4ed46869
KH
50(define-category ?c "Chinese")
51(define-category ?j "Japanese")
52(define-category ?h "Korean")
46bf60bc
KH
53(define-category ?e "Ethiopic
54Ethiopic (Ge'ez)")
55(define-category ?v "Viet
56Vietnamese")
4ed46869 57(define-category ?i "Indian")
6eba8645 58(define-category ?o "Lao")
9395eb7c 59(define-category ?q "Tibetan")
4ed46869
KH
60
61;; For each group (row) of 2-byte character sets.
62
46bf60bc
KH
63(define-category ?A "2-byte alnum
64Alpha-numeric characters of 2-byte character sets")
65(define-category ?C "2-byte han
66Chinese (Han) characters of 2-byte character sets")
67(define-category ?G "2-byte Greek
68Greek characters of 2-byte character sets")
69(define-category ?H "2-byte Hiragana
70Japanese Hiragana characters of 2-byte character sets")
71(define-category ?K "2-byte Katakana
72Japanese Katakana characters of 2-byte character sets")
73(define-category ?N "2-byte Korean
74Korean Hangul characters of 2-byte character sets")
91c491e0 75(define-category ?Y "2-byte Cyrillic
46bf60bc 76Cyrillic characters of 2-byte character sets")
4ed46869
KH
77(define-category ?I "Indian Glyphs")
78
79;; For phonetic classifications.
80
81(define-category ?0 "consonant")
46bf60bc 82(define-category ?1 "base vowel
4eb97232 83Base (independent) vowel")
46bf60bc 84(define-category ?2 "upper diacritic
4eb97232 85Upper diacritical mark (including upper vowel)")
46bf60bc 86(define-category ?3 "lower diacritic
4eb97232 87Lower diacritical mark (including lower vowel)")
46bf60bc 88(define-category ?4 "combining tone
4eb97232 89Combining tone mark")
9765a2ba 90(define-category ?5 "symbol")
4ed46869 91(define-category ?6 "digit")
91c491e0 92(define-category ?7 "vowel diacritic
4eb97232 93Vowel-modifying diacritical mark")
6eba8645
KH
94(define-category ?8 "vowel-signs")
95(define-category ?9 "semivowel lower")
4ed46869
KH
96
97;; For filling.
46bf60bc
KH
98(define-category ?| "line breakable
99While filling, we can break a line at this character.")
4ed46869 100
504af7b2 101;; For indentation calculation.
70ea295a 102(define-category ?\s
46bf60bc
KH
103 "space for indent
104This character counts as a space for indentation purposes.")
504af7b2 105
94487c4e 106;; Keep the following for `kinsoku' processing. See comments in
4ed46869 107;; kinsoku.el.
46bf60bc
KH
108(define-category ?> "Not at bol
109A character which can't be placed at beginning of line.")
110(define-category ?< "Not at eol
111A character which can't be placed at end of line.")
4ed46869 112
8ea6fa80
KH
113;; Base and Combining
114(define-category ?. "Base
115Base characters (Unicode General Category L,N,P,S,Zs)")
46bf60bc 116(define-category ?^ "Combining
4eb97232 117Combining diacritic or mark (Unicode General Category M)")
4ed46869
KH
118\f
119;;; Setting syntax and category.
120
121;; ASCII
122
e2cc40b7
KH
123;; All ASCII characters have the category `a' (ASCII) and `l' (Latin).
124(modify-category-entry '(32 . 127) ?a)
125(modify-category-entry '(32 . 127) ?l)
4ed46869 126
c94ae9eb
DL
127;; Deal with the CJK charsets first. Since the syntax of blocks is
128;; defined per charset, and the charsets may contain e.g. Latin
129;; characters, we end up with the wrong syntax definitions if we're
130;; not careful.
4ed46869 131
66bff5ed 132;; Chinese characters (Unicode)
a5bb49e1
KH
133(modify-category-entry '(#x2E80 . #x312F) ?|)
134(modify-category-entry '(#x3190 . #x33FF) ?|)
66a85e76
KH
135(modify-category-entry '(#x3400 . #x4DBF) ?C)
136(modify-category-entry '(#x4E00 . #x9FAF) ?C)
66bff5ed
KH
137(modify-category-entry '(#x3400 . #x9FAF) ?c)
138(modify-category-entry '(#x3400 . #x9FAF) ?|)
139(modify-category-entry '(#xF900 . #xFAFF) ?C)
140(modify-category-entry '(#xF900 . #xFAFF) ?c)
141(modify-category-entry '(#xF900 . #xFAFF) ?|)
796f8b2f
KH
142(modify-category-entry '(#x20000 . #x2FFFF) ?|)
143(modify-category-entry '(#x20000 . #x2FFFF) ?C)
144(modify-category-entry '(#x20000 . #x2FFFF) ?c)
8e4cd685 145
4ed46869
KH
146
147;; Chinese character set (GB2312)
148
66bff5ed
KH
149(map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2121 #x217E)
150(map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2221 #x227E)
151(map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2921 #x297E)
4ed46869 152
87a39edb 153(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c)
9ad4b491
KH
154(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2330 #x2339)
155(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2341 #x235A)
156(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2361 #x237A)
66bff5ed
KH
157(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?H #x2421 #x247E)
158(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?K #x2521 #x257E)
159(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?G #x2621 #x267E)
160(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E)
161(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E)
4ed46869
KH
162
163;; Chinese character set (BIG5)
164
e7259832 165(map-charset-chars #'modify-category-entry 'big5 ?c)
66a85e76 166(map-charset-chars #'modify-category-entry 'big5 ?C #xA259 #xA261)
9ad4b491 167(map-charset-chars #'modify-category-entry 'big5 ?C #xA440 #xC67E)
66a85e76 168(map-charset-chars #'modify-category-entry 'big5 ?C #xC940 #xF9DC)
4ed46869
KH
169
170;; Chinese character set (CNS11643)
171
87a39edb
DL
172(dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
173 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
174 chinese-cns11643-7))
175 (map-charset-chars #'modify-category-entry c ?c)
9ad4b491
KH
176 (if (eq c 'chinese-cns11643-1)
177 (map-charset-chars #'modify-category-entry c ?C #x4421 #x7E7E)
8e4cd685 178 (map-charset-chars #'modify-category-entry c ?C)))
4ed46869 179
8f924df7 180;; Japanese character set (JISX0201, JISX0208, JISX0212, JISX0213)
4ed46869 181
66bff5ed 182(map-charset-chars #'modify-category-entry 'katakana-jisx0201 ?k)
4ed46869 183
66bff5ed 184(map-charset-chars #'modify-category-entry 'latin-jisx0201 ?r)
4ed46869 185
8f924df7 186(dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212
761f6427
KH
187 japanese-jisx0213-1 japanese-jisx0213-2
188 cp932-2-byte))
8e4cd685 189 (map-charset-chars #'modify-category-entry l ?j))
4ed46869 190
c4186f9c
KH
191;; Fullwidth characters
192(modify-category-entry '(#xff01 . #xff60) ?\|)
193
269a5dd0 194;; Unicode equivalents of JISX0201-kana
66bff5ed
KH
195(let ((range '(#xff61 . #xff9f)))
196 (modify-category-entry range ?k)
197 (modify-category-entry range ?j)
198 (modify-category-entry range ?\|))
269a5dd0
DL
199
200;; Katakana block
796f8b2f
KH
201(modify-category-entry '(#x3099 . #x309C) ?K)
202(modify-category-entry '(#x30A0 . #x30FF) ?K)
6f3ac1e1 203(modify-category-entry '(#x31F0 . #x31FF) ?K)
b11c2874 204(modify-category-entry '(#x30A0 . #x30FA) ?\|)
796f8b2f 205(modify-category-entry #x30FF ?\|)
269a5dd0
DL
206
207;; Hiragana block
796f8b2f
KH
208(modify-category-entry '(#x3040 . #x309F) ?H)
209(modify-category-entry '(#x3040 . #x3096) ?\|)
210(modify-category-entry #x309F ?\|)
211(modify-category-entry #x30A0 ?H)
212(modify-category-entry #x30FC ?H)
213
269a5dd0 214
4ed46869 215;; JISX0208
66bff5ed
KH
216(map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2121 #x227E)
217(map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2821 #x287E)
218(let ((chars '(?ー ?゛ ?゜ ?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇)))
69c2c6ea 219 (dolist (elt chars)
abdaa411 220 (modify-syntax-entry (car chars) "w")))
66bff5ed
KH
221
222(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?A #x2321 #x237E)
223(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?H #x2421 #x247E)
224(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?K #x2521 #x257E)
225(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?G #x2621 #x267E)
226(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?Y #x2721 #x277E)
227(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?C #x3021 #x7E7E)
228(modify-category-entry ?ー ?K)
229(let ((chars '(?゛ ?゜)))
4ed46869
KH
230 (while chars
231 (modify-category-entry (car chars) ?K)
232 (modify-category-entry (car chars) ?H)
233 (setq chars (cdr chars))))
66a85e76 234(let ((chars '(?仝 ?々 ?〆 ?〇)))
4ed46869
KH
235 (while chars
236 (modify-category-entry (car chars) ?C)
237 (setq chars (cdr chars))))
238
239;; JISX0212
4ed46869 240
66bff5ed 241(map-charset-chars #'modify-syntax-entry 'japanese-jisx0212 "_" #x2121 #x237E)
4ed46869
KH
242
243;; JISX0201-Kana
87a39edb 244
abdaa411 245(let ((chars '(?。 ?、 ?・)))
4ed46869
KH
246 (while chars
247 (modify-syntax-entry (car chars) ".")
248 (setq chars (cdr chars))))
249
e6d10035
KH
250(modify-syntax-entry ?\「 "(」")
251(modify-syntax-entry ?\」 "(「")
226e4119 252
4ed46869
KH
253;; Korean character set (KSC5601)
254
87a39edb 255(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?h)
66bff5ed
KH
256
257(map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2121 #x227E)
439f7264
DL
258(map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2621 #x277E)
259(map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2830 #x287E)
260(map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2930 #x297E)
9ad4b491
KH
261(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2330 #x2339)
262(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2341 #x235A)
263(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2361 #x237A)
66bff5ed
KH
264(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?G #x2521 #x257E)
265(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?H #x2A21 #x2A7E)
266(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?K #x2B21 #x2B7E)
267(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?Y #x2C21 #x2C7E)
4ed46869 268
c94ae9eb 269;; These are in more than one charset.
8f924df7
KH
270(let ((parens (concat "〈〉《》「」『』【】〔〕〖〗〘〙〚〛"
271 "︵︶︷︸︹︺︻︼︽︾︿﹀﹁﹂﹃﹄"
272 "()[]{}"))
273 open close)
274 (dotimes (i (/ (length parens) 2))
275 (setq open (aref parens (* i 2))
276 close (aref parens (1+ (* i 2))))
277 (modify-syntax-entry open (format "(%c" close))
278 (modify-syntax-entry close (format ")%c" open))))
d05cfa1f 279
c94ae9eb 280;; Arabic character set
6eba8645 281
c94ae9eb
DL
282(let ((charsets '(arabic-iso8859-6
283 arabic-digit
284 arabic-1-column
285 arabic-2-column)))
286 (while charsets
287 (map-charset-chars #'modify-category-entry (car charsets) ?b)
288 (setq charsets (cdr charsets))))
289(modify-category-entry '(#x600 . #x6ff) ?b)
290(modify-category-entry '(#xfb50 . #xfdff) ?b)
291(modify-category-entry '(#xfe70 . #xfefe) ?b)
6eba8645 292
c94ae9eb
DL
293;; Cyrillic character set (ISO-8859-5)
294
295(modify-syntax-entry ?№ ".")
296
297;; Ethiopic character set
298
4c81b0f6
KH
299(modify-category-entry '(#x1200 . #x1399) ?e)
300(modify-category-entry '(#x2d80 . #x2dde) ?e)
55a3ed16 301(let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨)))
c94ae9eb
DL
302 (while chars
303 (modify-syntax-entry (car chars) ".")
304 (setq chars (cdr chars))))
305(map-charset-chars #'modify-category-entry 'ethiopic ?e)
306
307;; Hebrew character set (ISO-8859-8)
308
309(modify-syntax-entry #x5be ".") ; MAQAF
310(modify-syntax-entry #x5c0 ".") ; PASEQ
311(modify-syntax-entry #x5c3 ".") ; SOF PASUQ
312(modify-syntax-entry #x5f3 ".") ; GERESH
313(modify-syntax-entry #x5f4 ".") ; GERSHAYIM
314
315;; Indian character set (IS 13194 and other Emacs original Indian charsets)
316
317(modify-category-entry '(#x901 . #x970) ?i)
318(map-charset-chars #'modify-category-entry 'indian-is13194 ?i)
319(map-charset-chars #'modify-category-entry 'indian-2-column ?i)
d05cfa1f 320
6eba8645
KH
321;; Lao character set
322
abdaa411
DL
323(modify-category-entry '(#xe80 . #xeff) ?o)
324(map-charset-chars #'modify-category-entry 'lao ?o)
6eba8645 325
abdaa411 326(let ((deflist '(("ກ-ຮ" "w" ?0) ; consonant
e6d10035
KH
327 ("ະາຳຽເ-ໄ" "w" ?1) ; vowel base
328 ("ັິ-ືົໍ" "w" ?2) ; vowel upper
329 ("ຸູ" "w" ?3) ; vowel lower
8f924df7 330 ("່-໋" "w" ?4) ; tone mark
e6d10035
KH
331 ("ຼຽ" "w" ?9) ; semivowel lower
332 ("໐-໙" "w" ?6) ; digit
333 ("ຯໆ" "_" ?5) ; symbol
6eba8645
KH
334 ))
335 elm chars len syntax category to ch i)
336 (while deflist
337 (setq elm (car deflist))
338 (setq chars (car elm)
339 len (length chars)
340 syntax (nth 1 elm)
341 category (nth 2 elm)
342 i 0)
343 (while (< i len)
344 (if (= (aref chars i) ?-)
345 (setq i (1+ i)
4a027a0d
KH
346 to (aref chars i))
347 (setq ch (aref chars i)
6eba8645
KH
348 to ch))
349 (while (<= ch to)
269a5dd0
DL
350 (unless (string-equal syntax "w")
351 (modify-syntax-entry ch syntax))
6eba8645
KH
352 (modify-category-entry ch category)
353 (setq ch (1+ ch)))
4a027a0d 354 (setq i (1+ i)))
6eba8645
KH
355 (setq deflist (cdr deflist))))
356
4ed46869
KH
357;; Thai character set (TIS620)
358
abdaa411
DL
359(modify-category-entry '(#xe00 . #xe7f) ?t)
360(map-charset-chars #'modify-category-entry 'thai-tis620 ?t)
4ed46869
KH
361
362(let ((deflist '(;; chars syntax category
e6d10035
KH
363 ("ก-รลว-ฮ" "w" ?0) ; consonant
364 ("ฤฦะาำเ-ๅ" "w" ?1) ; vowel base
365 ("ัิ-ื็๎" "w" ?2) ; vowel upper
366 ("ุ-ฺ" "w" ?3) ; vowel lower
8f924df7 367 ("่-ํ" "w" ?4) ; tone mark
e6d10035
KH
368 ("๐-๙" "w" ?6) ; digit
369 ("ฯๆ฿๏๚๛" "_" ?5) ; symbol
4ed46869
KH
370 ))
371 elm chars len syntax category to ch i)
9395eb7c
KH
372 (while deflist
373 (setq elm (car deflist))
374 (setq chars (car elm)
375 len (length chars)
376 syntax (nth 1 elm)
377 category (nth 2 elm)
378 i 0)
379 (while (< i len)
380 (if (= (aref chars i) ?-)
381 (setq i (1+ i)
4a027a0d
KH
382 to (aref chars i))
383 (setq ch (aref chars i)
9395eb7c
KH
384 to ch))
385 (while (<= ch to)
269a5dd0
DL
386 (unless (string-equal syntax "w")
387 (modify-syntax-entry ch syntax))
9395eb7c
KH
388 (modify-category-entry ch category)
389 (setq ch (1+ ch)))
4a027a0d 390 (setq i (1+ i)))
9395eb7c
KH
391 (setq deflist (cdr deflist))))
392
393;; Tibetan character set
394
abdaa411
DL
395(modify-category-entry '(#xf00 . #xfff) ?q)
396(map-charset-chars #'modify-category-entry 'tibetan ?q)
397(map-charset-chars #'modify-category-entry 'tibetan-1-column ?q)
9395eb7c
KH
398
399(let ((deflist '(;; chars syntax category
725d7c92 400 ("ཀ-ཀྵཪ" "w" ?0) ; consonant
55a3ed16 401 ("ྐ-ྐྵྺྻྼ" "w" ?0) ;
725d7c92
DL
402 ("ིེཻོཽྀ" "w" ?2) ; upper vowel
403 ("ཾྂྃ྆྇ྈྉྊྋ" "w" ?2) ; upper modifier
55a3ed16 404 ("྄ཱུ༙༵༷" "w" ?3) ; lowel vowel/modifier
8f924df7 405 ("཰" "w" ?3) ; invisible vowel a
725d7c92
DL
406 ("༠-༩༪-༳" "w" ?6) ; digit
407 ("་།-༒༔ཿ" "." ?|) ; line-break char
408 ("་།༏༐༑༔ཿ" "." ?|) ;
409 ("༈་།-༒༔ཿ༽༴" "." ?>) ; prohibition
410 ("་།༏༐༑༔ཿ" "." ?>) ;
411 ("ༀ-༊༼࿁࿂྅" "." ?<) ; prohibition
412 ("༓༕-༘༚-༟༶༸-༻༾༿྾྿-࿏" "." ?q) ; others
9395eb7c
KH
413 ))
414 elm chars len syntax category to ch i)
4ed46869
KH
415 (while deflist
416 (setq elm (car deflist))
417 (setq chars (car elm)
418 len (length chars)
419 syntax (nth 1 elm)
420 category (nth 2 elm)
421 i 0)
422 (while (< i len)
423 (if (= (aref chars i) ?-)
424 (setq i (1+ i)
4a027a0d
KH
425 to (aref chars i))
426 (setq ch (aref chars i)
4ed46869
KH
427 to ch))
428 (while (<= ch to)
269a5dd0
DL
429 (unless (string-equal syntax "w")
430 (modify-syntax-entry ch syntax))
4ed46869
KH
431 (modify-category-entry ch category)
432 (setq ch (1+ ch)))
4a027a0d 433 (setq i (1+ i)))
4ed46869
KH
434 (setq deflist (cdr deflist))))
435
436;; Vietnamese character set
437
abdaa411
DL
438;; To make a word with Latin characters
439(map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?l)
440(map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?v)
441
442(map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?l)
443(map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?v)
4ed46869 444
e5dd1155
KH
445(let ((tbl (standard-case-table))
446 (i 32))
447 (while (< i 128)
725d7c92
DL
448 (let* ((char (decode-char 'vietnamese-viscii-upper i))
449 (charl (decode-char 'vietnamese-viscii-lower i))
450 (uc (encode-char char 'ucs))
451 (lc (encode-char charl 'ucs)))
452 (set-case-syntax-pair char (decode-char 'vietnamese-viscii-lower i)
4eb97232 453 tbl)
725d7c92
DL
454 (if uc (modify-category-entry uc ?v))
455 (if lc (modify-category-entry lc ?v)))
e5dd1155
KH
456 (setq i (1+ i))))
457
d807d0c7
KH
458;; Tai Viet
459(let ((deflist '(;; chars syntax category
460 ((?ꪀ. ?ꪯ) "w" ?0) ; cosonant
461 ("ꪱꪵꪶ" "w" ?1) ; vowel base
462 ((?ꪹ . ?ꪽ) "w" ?1) ; vowel base
463 ("ꪰꪲꪳꪷꪸꪾ" "w" ?2) ; vowel upper
464 ("ꪴ" "w" ?3) ; vowel lower
465 ("ꫀꫂ" "w" ?1) ; non-combining tone-mark
466 ("꪿꫁" "w" ?4) ; combining tone-mark
467 ((?ꫛ . ?꫟) "_" ?5) ; symbol
468 )))
469 (dolist (elm deflist)
470 (let ((chars (car elm))
471 (syntax (nth 1 elm))
472 (category (nth 2 elm)))
473 (if (consp chars)
474 (progn
475 (modify-syntax-entry chars syntax)
476 (modify-category-entry chars category))
477 (mapc #'(lambda (x)
478 (modify-syntax-entry x syntax)
479 (modify-category-entry x category))
480 chars)))))
c94ae9eb
DL
481
482;; Latin
483
484(modify-category-entry '(#x80 . #x024F) ?l)
d05cfa1f 485
85ef8ece
KH
486(let ((tbl (standard-case-table)) c)
487
4fb82d62
DL
488 ;; Latin-1
489
490 ;; Fixme: Some of the non-word syntaxes here perhaps should be
491 ;; reviewed. (Note that the following all implicitly have word
492 ;; syntax: ¢£¤¥¨ª¯²³´¶¸¹º.) There should be a well-defined way of
493 ;; relating Unicode categories to Emacs syntax codes.
db92e81e
KH
494
495 ;; NBSP isn't semantically interchangeable with other whitespace chars,
496 ;; so it's more like punctation.
497 (set-case-syntax ?  "." tbl)
4fb82d62
DL
498 (set-case-syntax ?¡ "." tbl)
499 (set-case-syntax ?¦ "_" tbl)
500 (set-case-syntax ?§ "." tbl)
501 (set-case-syntax ?© "_" tbl)
502 (set-case-syntax-delims 171 187 tbl) ; « »
503 (set-case-syntax ?¬ "_" tbl)
504 (set-case-syntax ?­ "_" tbl)
505 (set-case-syntax ?® "_" tbl)
506 (set-case-syntax ?° "_" tbl)
507 (set-case-syntax ?± "_" tbl)
508 (set-case-syntax ?µ "_" tbl)
509 (set-case-syntax ?· "_" tbl)
510 (set-case-syntax ?¼ "_" tbl)
511 (set-case-syntax ?½ "_" tbl)
512 (set-case-syntax ?¾ "_" tbl)
513 (set-case-syntax ?¿ "." tbl)
514 (let ((c 192))
515 (while (<= c 222)
516 (set-case-syntax-pair c (+ c 32) tbl)
517 (setq c (1+ c))))
518 (set-case-syntax ?× "_" tbl)
519 (set-case-syntax ?ß "w" tbl)
520 (set-case-syntax ?÷ "_" tbl)
521 ;; See below for ÿ.
85ef8ece 522
85ef8ece
KH
523 ;; Latin Extended-A, Latin Extended-B
524 (setq c #x0100)
e5e381c8
KH
525 (while (<= c #x02B8)
526 (modify-category-entry c ?l)
d05cfa1f 527 (setq c (1+ c)))
2bb915b8 528
e5e381c8
KH
529 (let ((pair-ranges '((#x0100 . #x012F)
530 (#x0132 . #x0137)
531 (#x0139 . #x0148)
532 (#x014a . #x0177)
533 (#x0179 . #x017E)
534 (#x0182 . #x0185)
796f8b2f
KH
535 (#x0187 . #x0188)
536 (#x018B . #x018C)
e5e381c8
KH
537 (#x0191 . #x0192)
538 (#x0198 . #x0199)
539 (#x01A0 . #x01A5)
540 (#x01A7 . #x01A8)
541 (#x01AC . #x01AD)
542 (#x01AF . #x01B0)
543 (#x01B3 . #x01B6)
544 (#x01BC . #x01BD)
545 (#x01CD . #x01DC)
546 (#x01DE . #x01EF)
547 (#x01F4 . #x01F5)
548 (#x01F8 . #x021F)
549 (#x0222 . #x0233)
550 (#x023B . #x023C)
551 (#x0241 . #x0242)
552 (#x0246 . #x024F))))
553 (dolist (elt pair-ranges)
554 (let ((from (car elt)) (to (cdr elt)))
555 (while (< from to)
556 (set-case-syntax-pair from (1+ from) tbl)
557 (setq from (+ from 2))))))
2bb915b8 558
796f8b2f
KH
559 (set-case-syntax-pair #x189 #x256 tbl)
560 (set-case-syntax-pair #x18A #x257 tbl)
561
2bb915b8
KH
562 ;; In some languages, such as Turkish, U+0049 LATIN CAPITAL LETTER I
563 ;; and U+0131 LATIN SMALL LETTER DOTLESS I make a case pair, and so
564 ;; do U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN
565 ;; SMALL LETTER I.
566
567 ;; We used to set up half of those correspondence unconditionally,
568 ;; but that makes searches slow. So now we don't set up either half
569 ;; of these correspondences by default.
570
571 ;; (set-downcase-syntax ?İ ?i tbl)
572 ;; (set-upcase-syntax ?I ?ı tbl)
573
e6d10035
KH
574 (set-case-syntax-pair ?DŽ ?dž tbl)
575 (set-case-syntax-pair ?Dž ?dž tbl)
576 (set-case-syntax-pair ?LJ ?lj tbl)
577 (set-case-syntax-pair ?Lj ?lj tbl)
578 (set-case-syntax-pair ?NJ ?nj tbl)
579 (set-case-syntax-pair ?Nj ?nj tbl)
e5e381c8 580
269a5dd0 581 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
e6d10035
KH
582 (set-case-syntax-pair ?DZ ?dz tbl)
583 (set-case-syntax-pair ?Dz ?dz tbl)
e6d10035
KH
584 (set-case-syntax-pair ?Ƕ ?ƕ tbl)
585 (set-case-syntax-pair ?Ƿ ?ƿ tbl)
269a5dd0 586
85ef8ece 587 ;; Latin Extended Additional
abdaa411 588 (modify-category-entry '(#x1e00 . #x1ef9) ?l)
85ef8ece 589 (setq c #x1e00)
d05cfa1f 590 (while (<= c #x1ef9)
d05cfa1f
KH
591 (and (zerop (% c 2))
592 (or (<= c #x1e94) (>= c #x1ea0))
abdaa411 593 (set-case-syntax-pair c (1+ c) tbl))
d05cfa1f
KH
594 (setq c (1+ c)))
595
85ef8ece 596 ;; Greek
abdaa411 597 (modify-category-entry '(#x0370 . #x03ff) ?g)
85ef8ece 598 (setq c #x0370)
d05cfa1f 599 (while (<= c #x03ff)
d05cfa1f
KH
600 (if (or (and (>= c #x0391) (<= c #x03a1))
601 (and (>= c #x03a3) (<= c #x03ab)))
abdaa411 602 (set-case-syntax-pair c (+ c 32) tbl))
d05cfa1f
KH
603 (and (>= c #x03da)
604 (<= c #x03ee)
605 (zerop (% c 2))
abdaa411 606 (set-case-syntax-pair c (1+ c) tbl))
d05cfa1f 607 (setq c (1+ c)))
e6d10035
KH
608 (set-case-syntax-pair ?Ά ?ά tbl)
609 (set-case-syntax-pair ?Έ ?έ tbl)
610 (set-case-syntax-pair ?Ή ?ή tbl)
611 (set-case-syntax-pair ?Ί ?ί tbl)
612 (set-case-syntax-pair ?Ό ?ό tbl)
613 (set-case-syntax-pair ?Ύ ?ύ tbl)
614 (set-case-syntax-pair ?Ώ ?ώ tbl)
d05cfa1f 615
269a5dd0
DL
616 ;; Armenian
617 (setq c #x531)
618 (while (<= c #x556)
abdaa411 619 (set-case-syntax-pair c (+ c #x30) tbl)
269a5dd0
DL
620 (setq c (1+ c)))
621
85ef8ece 622 ;; Greek Extended
abdaa411 623 (modify-category-entry '(#x1f00 . #x1fff) ?g)
85ef8ece 624 (setq c #x1f00)
d05cfa1f 625 (while (<= c #x1fff)
d05cfa1f
KH
626 (and (<= (logand c #x000f) 7)
627 (<= c #x1fa7)
796f8b2f
KH
628 (not (memq c '(#x1f16 #x1f17 #x1f56 #x1f57
629 #x1f50 #x1f52 #x1f54 #x1f56)))
630 (/= (logand c #x00f0) #x70)
abdaa411 631 (set-case-syntax-pair (+ c 8) c tbl))
d05cfa1f 632 (setq c (1+ c)))
e6d10035
KH
633 (set-case-syntax-pair ?Ᾰ ?ᾰ tbl)
634 (set-case-syntax-pair ?Ᾱ ?ᾱ tbl)
635 (set-case-syntax-pair ?Ὰ ?ὰ tbl)
636 (set-case-syntax-pair ?Ά ?ά tbl)
637 (set-case-syntax-pair ?ᾼ ?ᾳ tbl)
638 (set-case-syntax-pair ?Ὲ ?ὲ tbl)
639 (set-case-syntax-pair ?Έ ?έ tbl)
640 (set-case-syntax-pair ?Ὴ ?ὴ tbl)
641 (set-case-syntax-pair ?Ή ?ή tbl)
642 (set-case-syntax-pair ?ῌ ?ῃ tbl)
643 (set-case-syntax-pair ?Ῐ ?ῐ tbl)
644 (set-case-syntax-pair ?Ῑ ?ῑ tbl)
645 (set-case-syntax-pair ?Ὶ ?ὶ tbl)
646 (set-case-syntax-pair ?Ί ?ί tbl)
647 (set-case-syntax-pair ?Ῠ ?ῠ tbl)
648 (set-case-syntax-pair ?Ῡ ?ῡ tbl)
649 (set-case-syntax-pair ?Ὺ ?ὺ tbl)
650 (set-case-syntax-pair ?Ύ ?ύ tbl)
651 (set-case-syntax-pair ?Ῥ ?ῥ tbl)
652 (set-case-syntax-pair ?Ὸ ?ὸ tbl)
653 (set-case-syntax-pair ?Ό ?ό tbl)
654 (set-case-syntax-pair ?Ὼ ?ὼ tbl)
655 (set-case-syntax-pair ?Ώ ?ώ tbl)
656 (set-case-syntax-pair ?ῼ ?ῳ tbl)
d05cfa1f 657
85ef8ece 658 ;; cyrillic
abdaa411 659 (modify-category-entry '(#x0400 . #x04FF) ?y)
85ef8ece 660 (setq c #x0400)
d05cfa1f 661 (while (<= c #x04ff)
d05cfa1f
KH
662 (and (>= c #x0400)
663 (<= c #x040f)
abdaa411 664 (set-case-syntax-pair c (+ c 80) tbl))
d05cfa1f
KH
665 (and (>= c #x0410)
666 (<= c #x042f)
abdaa411 667 (set-case-syntax-pair c (+ c 32) tbl))
d05cfa1f
KH
668 (and (zerop (% c 2))
669 (or (and (>= c #x0460) (<= c #x0480))
670 (and (>= c #x048c) (<= c #x04be))
671 (and (>= c #x04d0) (<= c #x04f4)))
8f924df7 672 (set-case-syntax-pair c (1+ c) tbl))
d05cfa1f 673 (setq c (1+ c)))
e6d10035
KH
674 (set-case-syntax-pair ?Ӂ ?ӂ tbl)
675 (set-case-syntax-pair ?Ӄ ?ӄ tbl)
676 (set-case-syntax-pair ?Ӈ ?ӈ tbl)
677 (set-case-syntax-pair ?Ӌ ?ӌ tbl)
678 (set-case-syntax-pair ?Ӹ ?ӹ tbl)
d05cfa1f 679
85ef8ece
KH
680 ;; general punctuation
681 (setq c #x2000)
d05cfa1f
KH
682 (while (<= c #x200b)
683 (set-case-syntax c " " tbl)
684 (setq c (1+ c)))
b427c97e
DL
685 (while (<= c #x200F)
686 (set-case-syntax c "." tbl)
687 (setq c (1+ c)))
688 ;; Fixme: These aren't all right:
6b61353c
KH
689 (setq c #x2010)
690 (while (<= c #x2016)
691 (set-case-syntax c "_" tbl)
692 (setq c (1+ c)))
693 ;; Punctuation syntax for quotation marks (like `)
694 (while (<= c #x201f)
695 (set-case-syntax c "." tbl)
696 (setq c (1+ c)))
697 ;; Fixme: These aren't all right:
d05cfa1f
KH
698 (while (<= c #x2027)
699 (set-case-syntax c "_" tbl)
700 (setq c (1+ c)))
b427c97e
DL
701 (while (<= c #x206F)
702 (set-case-syntax c "." tbl)
703 (setq c (1+ c)))
d05cfa1f 704
269a5dd0
DL
705 ;; Roman numerals
706 (setq c #x2160)
707 (while (<= c #x216f)
abdaa411 708 (set-case-syntax-pair c (+ c #x10) tbl)
269a5dd0
DL
709 (setq c (1+ c)))
710
4fb82d62
DL
711 ;; Fixme: The following blocks might be better as symbol rather than
712 ;; punctuation.
b427c97e
DL
713 ;; Arrows
714 (setq c #x2190)
6ca54a3a
DL
715 (while (<= c #x21FF)
716 (set-case-syntax c "." tbl)
b427c97e
DL
717 (setq c (1+ c)))
718 ;; Mathematical Operators
719 (while (<= c #x22FF)
6ca54a3a 720 (set-case-syntax c "." tbl)
b427c97e
DL
721 (setq c (1+ c)))
722 ;; Miscellaneous Technical
723 (while (<= c #x23FF)
6ca54a3a 724 (set-case-syntax c "." tbl)
b427c97e
DL
725 (setq c (1+ c)))
726 ;; Control Pictures
727 (while (<= c #x243F)
6ca54a3a 728 (set-case-syntax c "_" tbl)
269a5dd0
DL
729 (setq c (1+ c)))
730
731 ;; Circled Latin
732 (setq c #x24b6)
733 (while (<= c #x24cf)
abdaa411
DL
734 (set-case-syntax-pair c (+ c 26) tbl)
735 (modify-category-entry c ?l)
736 (modify-category-entry (+ c 26) ?l)
269a5dd0
DL
737 (setq c (1+ c)))
738
739 ;; Fullwidth Latin
740 (setq c #xff21)
741 (while (<= c #xff3a)
abdaa411
DL
742 (set-case-syntax-pair c (+ c #x20) tbl)
743 (modify-category-entry c ?l)
744 (modify-category-entry (+ c #x20) ?l)
269a5dd0
DL
745 (setq c (1+ c)))
746
269a5dd0 747 ;; Combining diacritics
abdaa411 748 (modify-category-entry '(#x300 . #x362) ?^)
269a5dd0 749 ;; Combining marks
abdaa411 750 (modify-category-entry '(#x20d0 . #x20e3) ?^)
269a5dd0
DL
751
752 ;; Fixme: syntax for symbols &c
753 )
6b61353c
KH
754
755(let ((pairs
e55a4d4e
KH
756 '("⁅⁆" ; U+2045 U+2046
757 "⁽⁾" ; U+207D U+207E
758 "₍₎" ; U+208D U+208E
759 "〈〉" ; U+2329 U+232A
760 "⎴⎵" ; U+23B4 U+23B5
761 "❨❩" ; U+2768 U+2769
762 "❪❫" ; U+276A U+276B
763 "❬❭" ; U+276C U+276D
764 "❰❱" ; U+2770 U+2771
765 "❲❳" ; U+2772 U+2773
766 "❴❵" ; U+2774 U+2775
767 "⟦⟧" ; U+27E6 U+27E7
768 "⟨⟩" ; U+27E8 U+27E9
769 "⟪⟫" ; U+27EA U+27EB
770 "⦃⦄" ; U+2983 U+2984
771 "⦅⦆" ; U+2985 U+2986
772 "⦇⦈" ; U+2987 U+2988
773 "⦉⦊" ; U+2989 U+298A
774 "⦋⦌" ; U+298B U+298C
775 "⦍⦎" ; U+298D U+298E
776 "⦏⦐" ; U+298F U+2990
777 "⦑⦒" ; U+2991 U+2992
778 "⦓⦔" ; U+2993 U+2994
779 "⦕⦖" ; U+2995 U+2996
780 "⦗⦘" ; U+2997 U+2998
781 "⧼⧽" ; U+29FC U+29FD
782 "〈〉" ; U+3008 U+3009
783 "《》" ; U+300A U+300B
784 "「」" ; U+300C U+300D
785 "『』" ; U+300E U+300F
786 "【】" ; U+3010 U+3011
787 "〔〕" ; U+3014 U+3015
788 "〖〗" ; U+3016 U+3017
789 "〘〙" ; U+3018 U+3019
790 "〚〛" ; U+301A U+301B
791 "﴾﴿" ; U+FD3E U+FD3F
792 "︵︶" ; U+FE35 U+FE36
793 "︷︸" ; U+FE37 U+FE38
794 "︹︺" ; U+FE39 U+FE3A
795 "︻︼" ; U+FE3B U+FE3C
796 "︽︾" ; U+FE3D U+FE3E
797 "︿﹀" ; U+FE3F U+FE40
798 "﹁﹂" ; U+FE41 U+FE42
799 "﹃﹄" ; U+FE43 U+FE44
800 "﹙﹚" ; U+FE59 U+FE5A
801 "﹛﹜" ; U+FE5B U+FE5C
802 "﹝﹞" ; U+FE5D U+FE5E
803 "()" ; U+FF08 U+FF09
804 "[]" ; U+FF3B U+FF3D
805 "{}" ; U+FF5B U+FF5D
806 "⦅⦆" ; U+FF5F U+FF60
807 "「」" ; U+FF62 U+FF63
6b61353c
KH
808 )))
809 (dolist (elt pairs)
810 (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
811 (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
812
4ed46869 813\f
777cfce6 814;; For each character set, put the information of the most proper
aaa9f206 815;; coding system to encode it by `preferred-coding-system' property.
777cfce6 816
abdaa411 817;; Fixme: should this be junked?
777cfce6
KH
818(let ((l '((latin-iso8859-1 . iso-latin-1)
819 (latin-iso8859-2 . iso-latin-2)
820 (latin-iso8859-3 . iso-latin-3)
821 (latin-iso8859-4 . iso-latin-4)
822 (thai-tis620 . thai-tis620)
823 (greek-iso8859-7 . greek-iso-8bit)
824 (arabic-iso8859-6 . iso-2022-7bit)
825 (hebrew-iso8859-8 . hebrew-iso-8bit)
826 (katakana-jisx0201 . japanese-shift-jis)
827 (latin-jisx0201 . japanese-shift-jis)
828 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
829 (latin-iso8859-9 . iso-latin-5)
830 (japanese-jisx0208-1978 . iso-2022-jp)
7870bdd9
KH
831 (chinese-gb2312 . chinese-iso-8bit)
832 (chinese-gbk . chinese-gbk)
833 (gb18030-2-byte . chinese-gb18030)
834 (gb18030-4-byte-bmp . chinese-gb18030)
835 (gb18030-4-byte-smp . chinese-gb18030)
836 (gb18030-4-byte-ext-1 . chinese-gb18030)
837 (gb18030-4-byte-ext-2 . chinese-gb18030)
777cfce6
KH
838 (japanese-jisx0208 . iso-2022-jp)
839 (korean-ksc5601 . iso-2022-kr)
840 (japanese-jisx0212 . iso-2022-jp)
777cfce6
KH
841 (chinese-big5-1 . chinese-big5)
842 (chinese-big5-2 . chinese-big5)
843 (chinese-sisheng . iso-2022-7bit)
844 (ipa . iso-2022-7bit)
845 (vietnamese-viscii-lower . vietnamese-viscii)
846 (vietnamese-viscii-upper . vietnamese-viscii)
847 (arabic-digit . iso-2022-7bit)
848 (arabic-1-column . iso-2022-7bit)
777cfce6
KH
849 (lao . lao)
850 (arabic-2-column . iso-2022-7bit)
851 (indian-is13194 . devanagari)
69e138b2 852 (indian-glyph . devanagari)
777cfce6 853 (tibetan-1-column . tibetan)
58cd41a3 854 (ethiopic . iso-2022-7bit)
7870bdd9
KH
855 (chinese-cns11643-1 . iso-2022-cn)
856 (chinese-cns11643-2 . iso-2022-cn)
777cfce6
KH
857 (chinese-cns11643-3 . iso-2022-cn)
858 (chinese-cns11643-4 . iso-2022-cn)
859 (chinese-cns11643-5 . iso-2022-cn)
860 (chinese-cns11643-6 . iso-2022-cn)
861 (chinese-cns11643-7 . iso-2022-cn)
862 (indian-2-column . devanagari)
7a860cf2
DL
863 (tibetan . tibetan)
864 (latin-iso8859-14 . iso-latin-8)
865 (latin-iso8859-15 . iso-latin-9))))
777cfce6 866 (while l
aaa9f206 867 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
777cfce6 868 (setq l (cdr l))))
df0415c5
KH
869
870\f
98a663f1 871;; Setup auto-fill-chars for charsets that should invoke auto-filling.
7760ba82 872;; SPACE and NEWLINE are already set.
df21429c
KH
873
874(set-char-table-range auto-fill-chars '(#x3041 . #x30FF) t)
875(set-char-table-range auto-fill-chars '(#x3400 . #x4DB5) t)
876(set-char-table-range auto-fill-chars '(#x4e00 . #x9fbb) t)
877(set-char-table-range auto-fill-chars '(#xF900 . #xFAFF) t)
878(set-char-table-range auto-fill-chars '(#xFF00 . #xFF9F) t)
879(set-char-table-range auto-fill-chars '(#x20000 . #x2FFFF) t)
880
55bd52ea 881\f
7760ba82
KH
882;;; Setting char-width-table. The default is 1.
883
884;; 0: non-spacing, enclosing combining, formatting, Hangul Jamo medial
885;; and final characters.
a2a22302 886(let ((l '((#x0300 . #x036F)
7760ba82
KH
887 (#x0483 . #x0489)
888 (#x0591 . #x05BD)
889 (#x05BF . #x05BF)
890 (#x05C1 . #x05C2)
891 (#x05C4 . #x05C5)
892 (#x05C7 . #x05C7)
893 (#x0600 . #x0603)
894 (#x0610 . #x0615)
895 (#x064B . #x065E)
896 (#x0670 . #x0670)
897 (#x06D6 . #x06E4)
898 (#x06E7 . #x06E8)
899 (#x06EA . #x06ED)
900 (#x070F . #x070F)
901 (#x0711 . #x0711)
902 (#x0730 . #x074A)
903 (#x07A6 . #x07B0)
904 (#x07EB . #x07F3)
905 (#x0901 . #x0902)
906 (#x093C . #x093C)
907 (#x0941 . #x0948)
908 (#x094D . #x094D)
909 (#x0951 . #x0954)
910 (#x0962 . #x0963)
911 (#x0981 . #x0981)
912 (#x09BC . #x09BC)
913 (#x09C1 . #x09C4)
914 (#x09CD . #x09CD)
915 (#x09E2 . #x09E3)
916 (#x0A01 . #x0A02)
917 (#x0A3C . #x0A3C)
918 (#x0A41 . #x0A4D)
919 (#x0A70 . #x0A71)
920 (#x0A81 . #x0A82)
921 (#x0ABC . #x0ABC)
922 (#x0AC1 . #x0AC8)
923 (#x0ACD . #x0ACD)
924 (#x0AE2 . #x0AE3)
925 (#x0B01 . #x0B01)
926 (#x0B3C . #x0B3C)
927 (#x0B3F . #x0B3F)
928 (#x0B41 . #x0B43)
929 (#x0B4D . #x0B56)
930 (#x0B82 . #x0B82)
931 (#x0BC0 . #x0BC0)
932 (#x0BCD . #x0BCD)
933 (#x0C3E . #x0C40)
934 (#x0C46 . #x0C56)
935 (#x0CBC . #x0CBC)
936 (#x0CBF . #x0CBF)
937 (#x0CC6 . #x0CC6)
938 (#x0CCC . #x0CCD)
939 (#x0CE2 . #x0CE3)
940 (#x0D41 . #x0D43)
941 (#x0D4D . #x0D4D)
942 (#x0DCA . #x0DCA)
943 (#x0DD2 . #x0DD6)
944 (#x0E31 . #x0E31)
945 (#x0E34 . #x0E3A)
946 (#x0E47 . #x0E4E)
947 (#x0EB1 . #x0EB1)
948 (#x0EB4 . #x0EBC)
949 (#x0EC8 . #x0ECD)
950 (#x0F18 . #x0F19)
951 (#x0F35 . #x0F35)
952 (#x0F37 . #x0F37)
953 (#x0F39 . #x0F39)
954 (#x0F71 . #x0F7E)
955 (#x0F80 . #x0F84)
956 (#x0F86 . #x0F87)
957 (#x0F90 . #x0FBC)
958 (#x0FC6 . #x0FC6)
959 (#x102D . #x1030)
960 (#x1032 . #x1037)
961 (#x1039 . #x1039)
962 (#x1058 . #x1059)
963 (#x1160 . #x11FF)
964 (#x135F . #x135F)
965 (#x1712 . #x1714)
966 (#x1732 . #x1734)
967 (#x1752 . #x1753)
968 (#x1772 . #x1773)
969 (#x17B4 . #x17B5)
970 (#x17B7 . #x17BD)
971 (#x17C6 . #x17C6)
972 (#x17C9 . #x17D3)
973 (#x17DD . #x17DD)
974 (#x180B . #x180D)
975 (#x18A9 . #x18A9)
976 (#x1920 . #x1922)
977 (#x1927 . #x1928)
978 (#x1932 . #x1932)
979 (#x1939 . #x193B)
980 (#x1A17 . #x1A18)
981 (#x1B00 . #x1B03)
982 (#x1B34 . #x1B34)
983 (#x1B36 . #x1B3A)
984 (#x1B3C . #x1B3C)
985 (#x1B42 . #x1B42)
986 (#x1B6B . #x1B73)
987 (#x1DC0 . #x1DFF)
988 (#x200B . #x200F)
989 (#x202A . #x202E)
990 (#x2060 . #x206F)
991 (#x20D0 . #x20EF)
992 (#x302A . #x302F)
993 (#x3099 . #x309A)
994 (#xA806 . #xA806)
995 (#xA80B . #xA80B)
996 (#xA825 . #xA826)
997 (#xFB1E . #xFB1E)
998 (#xFE00 . #xFE0F)
999 (#xFE20 . #xFE23)
1000 (#xFEFF . #xFEFF)
1001 (#xFFF9 . #xFFFB)
1002 (#x10A01 . #x10A0F)
1003 (#x10A38 . #x10A3F)
1004 (#x1D167 . #x1D169)
1005 (#x1D173 . #x1D182)
1006 (#x1D185 . #x1D18B)
1007 (#x1D1AA . #x1D1AD)
1008 (#x1D242 . #x1D244)
1009 (#xE0001 . #xE01EF))))
1010 (dolist (elt l)
1011 (set-char-table-range char-width-table elt 0)))
1012
1013;; 2: East Asian Wide and Full-width characters.
1014(let ((l '((#x1100 . #x115F)
1015 (#x2329 . #x232A)
1016 (#x2E80 . #x303E)
1017 (#x3040 . #xA4CF)
1018 (#xAC00 . #xD7A3)
ed0cb465 1019 (#xF900 . #xFAFF)
7760ba82 1020 (#xFE30 . #xFE6F)
bb5c62cf 1021 (#xFF01 . #xFF60)
7760ba82
KH
1022 (#xFFE0 . #xFFE6)
1023 (#x20000 . #x2FFFF)
1024 (#x30000 . #x3FFFF))))
ed0cb465 1025 (dolist (elt l)
7760ba82 1026 (set-char-table-range char-width-table elt 2)))
173f18ce
DL
1027
1028;; Other double width
7760ba82
KH
1029;;(map-charset-chars
1030;; (lambda (range ignore) (set-char-table-range char-width-table range 2))
1031;; 'ethiopic)
1032;; (map-charset-chars
1033;; (lambda (range ignore) (set-char-table-range char-width-table range 2))
1034;; 'tibetan)
173f18ce
DL
1035(map-charset-chars
1036 (lambda (range ignore) (set-char-table-range char-width-table range 2))
1037 'indian-2-column)
1038(map-charset-chars
1039 (lambda (range ignore) (set-char-table-range char-width-table range 2))
1040 'arabic-2-column)
777cfce6 1041
dbff07a2
KH
1042;; Internal use only.
1043;; Alist of locale symbol vs charsets. In a language environment
1044;; corresponding to the locale, width of characters in the charsets is
1045;; set to 2. Each element has the form:
1046;; (LOCALE TABLE (CHARSET (FROM-CODE . TO-CODE) ...) ...)
1047;; LOCALE: locale symbol
1048;; TABLE: char-table used for char-width-table, initially nil.
1049;; CAHRSET: character set
1050;; FROM-CODE, TO-CODE: range of code-points in CHARSET
1051
1052(defvar cjk-char-width-table-list
1053 '((ja_JP nil (japanese-jisx0208 (#x2121 . #x287E))
1054 (cp932-2-byte (#x8140 . #x879F)))
1055 (zh_CN nil (chinese-gb2312 (#x2121 . #x297E)))
1056 (zh_HK nil (big5-hkscs (#xA140 . #xA3FE) (#xC6A0 . #xC8FE)))
1057 (zh_TW nil (big5 (#xA140 . #xA3FE))
1058 (chinese-cns11643-1 (#x2121 . #x427E)))
1059 (ko_KR nil (korean-ksc5601 (#x2121 . #x2C7E)))))
1060
1061;; Internal use only.
1062;; Setup char-width-table appropriate for a language environment
1063;; corresponding to LOCALE-NAME (symbol).
1064
1065(defun use-cjk-char-width-table (locale-name)
1066 (while (char-table-parent char-width-table)
1067 (setq char-width-table (char-table-parent char-width-table)))
1068 (let ((slot (assq locale-name cjk-char-width-table-list))
1069 table)
1070 (or slot (error "Unknown locale for CJK language environment: %s"
1071 locale-name))
1072 (unless (nth 1 slot)
1073 (let ((table (make-char-table nil)))
1074 (dolist (charset-info (nthcdr 2 slot))
1075 (let ((charset (car charset-info)))
1076 (dolist (code-range (cdr charset-info))
1077 (map-charset-chars #'(lambda (range arg)
1078 (set-char-table-range table range 2))
1079 charset nil
1080 (car code-range) (cdr code-range)))))
1081 (optimize-char-table table)
1082 (set-char-table-parent table char-width-table)
1083 (setcar (cdr slot) table)))
1084 (setq char-width-table (nth 1 slot))))
55a3ed16
KH
1085
1086(defun use-default-char-width-table ()
1087 "Internal use only.
9f336de0 1088Setup char-width-table appropriate for non-CJK language environment."
dbff07a2
KH
1089 (while (char-table-parent char-width-table)
1090 (setq char-width-table (char-table-parent char-width-table))))
55a3ed16 1091
87a39edb 1092(optimize-char-table (standard-case-table))
87a39edb
DL
1093(optimize-char-table (standard-syntax-table))
1094
55a3ed16
KH
1095\f
1096;; Setting char-script-table.
1097
b427c97e
DL
1098;; The Unicode blocks actually extend past some of these ranges with
1099;; undefined codepoints.
9ce5de1c
KH
1100(let ((script-list nil))
1101 (dolist
1102 (elt
b982c760 1103 '((#x0000 #x007F latin)
6c52dd78
JR
1104 (#x00A0 #x024F latin)
1105 (#x0250 #x02AF phonetic)
1106 (#x02B0 #x036F latin)
9ce5de1c
KH
1107 (#x0370 #x03E1 greek)
1108 (#x03E2 #x03EF coptic)
1109 (#x03F0 #x03F3 greek)
1110 (#x0400 #x04FF cyrillic)
1111 (#x0530 #x058F armenian)
1112 (#x0590 #x05FF hebrew)
1113 (#x0600 #x06FF arabic)
1114 (#x0700 #x074F syriac)
e7da2f38 1115 (#x07C0 #x07FA nko)
9ce5de1c
KH
1116 (#x0780 #x07BF thaana)
1117 (#x0900 #x097F devanagari)
1118 (#x0980 #x09FF bengali)
1119 (#x0A00 #x0A7F gurmukhi)
1120 (#x0A80 #x0AFF gujarati)
1121 (#x0B00 #x0B7F oriya)
1122 (#x0B80 #x0BFF tamil)
1123 (#x0C00 #x0C7F telugu)
1124 (#x0C80 #x0CFF kannada)
1125 (#x0D00 #x0D7F malayalam)
1126 (#x0D80 #x0DFF sinhala)
1127 (#x0E00 #x0E5F thai)
1128 (#x0E80 #x0EDF lao)
1129 (#x0F00 #x0FFF tibetan)
d99ea08e 1130 (#x1000 #x109F burmese)
9ce5de1c
KH
1131 (#x10A0 #x10FF georgian)
1132 (#x1100 #x11FF hangul)
4c81b0f6 1133 (#x1200 #x139F ethiopic)
9ce5de1c
KH
1134 (#x13A0 #x13FF cherokee)
1135 (#x1400 #x167F canadian-aboriginal)
1136 (#x1680 #x169F ogham)
1137 (#x16A0 #x16FF runic)
1138 (#x1780 #x17FF khmer)
1139 (#x1800 #x18AF mongolian)
6c52dd78 1140 (#x1D00 #x1DFF phonetic)
9ce5de1c
KH
1141 (#x1E00 #x1EFF latin)
1142 (#x1F00 #x1FFF greek)
f041d33e 1143 (#x2000 #x27FF symbol)
9ce5de1c 1144 (#x2800 #x28FF braille)
4c81b0f6 1145 (#x2D80 #x2DDF ethiopic)
9ce5de1c
KH
1146 (#x2E80 #x2FDF han)
1147 (#x2FF0 #x2FFF ideographic-description)
1148 (#x3000 #x303F cjk-misc)
1149 (#x3040 #x30FF kana)
1150 (#x3100 #x312F bopomofo)
1151 (#x3130 #x318F hangul)
1152 (#x3190 #x319F kanbun)
1153 (#x31A0 #x31BF bopomofo)
1154 (#x3400 #x9FAF han)
1155 (#xA000 #xA4CF yi)
1ffae953 1156 (#xAA00 #xAA5F cham)
d99ea08e 1157 (#xAA60 #xAA7B burmese)
d807d0c7 1158 (#xAA80 #xAADF tai-viet)
9ce5de1c 1159 (#xAC00 #xD7AF hangul)
95ac45fa 1160 (#xF900 #xFAFF han)
9ce5de1c
KH
1161 (#xFB1D #xFB4F hebrew)
1162 (#xFB50 #xFDFF arabic)
1163 (#xFE70 #xFEFC arabic)
1164 (#xFF00 #xFF5F cjk-misc)
1165 (#xFF61 #xFF9F kana)
1166 (#xFFE0 #xFFE6 cjk-misc)
458888ab
KH
1167 (#x10000 #x100FF linear-b)
1168 (#x10100 #x1013F aegean-number)
1169 (#x10140 #x1018A ancient-greek-number)
1170 (#x10190 #x1019B ancient-symbol)
1171 (#x101D0 #x101FF phaistos-disc)
1172 (#x10280 #x1029F lycian)
1173 (#x102A0 #x102DF carian)
1174 (#x10300 #x1032F olt-italic)
1175 (#x10380 #x1039F ugaritic)
1176 (#x103A0 #x103DF old-persian)
1177 (#x10400 #x1044F deseret)
1178 (#x10450 #x1047F shavian)
1179 (#x10480 #x104AF osmanya)
1180 (#x10800 #x1083F cypriot-syllabary)
1181 (#x10900 #x1091F phoenician)
1182 (#x10920 #x1093F lydian)
1183 (#x10A00 #x10A5F kharoshthi)
1184 (#x12000 #x123FF cuneiform)
1185 (#x12400 #x1247F cuneiform-numbers-and-punctuation)
e7da2f38
KH
1186 (#x1D000 #x1D0FF byzantine-musical-symbol)
1187 (#x1D100 #x1D1FF musical-symbol)
458888ab
KH
1188 (#x1D200 #x1D24F ancient-greek-musical-notation)
1189 (#x1D300 #x1D35F tai-xuan-jing-symbol)
1190 (#x1D360 #x1D37F counting-rod-numeral)
e7da2f38 1191 (#x1D400 #x1D7FF mathematical)
458888ab
KH
1192 (#x1F000 #x1F02F mahjong-tile)
1193 (#x1F030 #x1F09F domino-tile)
e7259832 1194 (#x20000 #x2AFFF han)
9ce5de1c
KH
1195 (#x2F800 #x2FFFF han)))
1196 (set-char-table-range char-script-table
1197 (cons (car elt) (nth 1 elt)) (nth 2 elt))
1198 (or (memq (nth 2 elt) script-list)
1199 (setq script-list (cons (nth 2 elt) script-list))))
1200 (set-char-table-extra-slot char-script-table 0 (nreverse script-list)))
1201
8f924df7 1202(map-charset-chars
cdfc5141
KH
1203 #'(lambda (range ignore)
1204 (set-char-table-range char-script-table range 'tibetan))
1205 'tibetan)
1206
e7259832 1207\f
59db3a5c
KH
1208;;; Setting unicode-category-table.
1209
1210;; This macro is to build unicode-category-table at compile time so
1211;; that C code can access the table efficiently.
1212(defmacro build-unicode-category-table ()
1213 (let ((table (make-char-table 'unicode-category-table nil)))
1214 (dotimes (i #x110000)
1215 (if (or (< i #xD800)
796f8b2f
KH
1216 (and (>= i #xF900) (< i #x30000))
1217 (and (>= i #xE0000) (< i #xE0200)))
59db3a5c
KH
1218 (aset table i (get-char-code-property i 'general-category))))
1219 (set-char-table-range table '(#xE000 . #xF8FF) 'Co)
1220 (set-char-table-range table '(#xF0000 . #xFFFFD) 'Co)
1221 (set-char-table-range table '(#x100000 . #x10FFFD) 'Co)
1222 (optimize-char-table table 'eq)
1223 table))
1224
1225(setq unicode-category-table (build-unicode-category-table))
8ea6fa80
KH
1226(map-char-table #'(lambda (key val)
1227 (if (and val
1228 (or (and (/= (aref (symbol-name val) 0) ?M)
1229 (/= (aref (symbol-name val) 0) ?C))
1230 (eq val 'Zs)))
1231 (modify-category-entry key ?.)))
1232 unicode-category-table)
1233
1234(optimize-char-table (standard-category-table))
59db3a5c
KH
1235
1236\f
b2cca856
KH
1237;; Display of glyphless characters.
1238
1239(defvar char-acronym-table
1240 (make-char-table 'char-acronym-table nil)
1241 "Char table of acronyms for non-graphic characters.")
1242
1243(let ((c0-acronyms '("NUL" "SOH" "STX" "ETX" "EOT" "ENQ" "ACK" "BEL"
1244 "BS" nil nil "VT" "FF" "CR" "SO" "SI"
1245 "DLE" "DC1" "DC2" "DC3" "DC4" "NAK" "SYN" "ETB"
1246 "CAN" "EM" "SUB" "ESC" "FC" "GS" "RS" "US")))
1247 (dotimes (i 32)
1248 (aset char-acronym-table i (car c0-acronyms))
1249 (setq c0-acronyms (cdr c0-acronyms))))
1250
1251(let ((c1-acronyms '("XXX" "XXX" "BPH" "NBH" "IND" "NEL" "SSA" "ESA"
1252 "HTS" "HTJ" "VTS" "PLD" "PLU" "R1" "SS2" "SS1"
1253 "DCS" "PU1" "PU2" "STS" "CCH" "MW" "SPA" "EPA"
1254 "SOS" "XXX" "SC1" "CSI" "ST" "OSC" "PM" "APC")))
1255 (dotimes (i 32)
1256 (aset char-acronym-table (+ #x0080 i) (car c1-acronyms))
1257 (setq c1-acronyms (cdr c1-acronyms))))
1258
1259(aset char-acronym-table #x17B4 "KIVAQ") ; KHMER VOWEL INHERENT AQ
1260(aset char-acronym-table #x17B5 "KIVAA") ; KHMER VOWEL INHERENT AA
1261(aset char-acronym-table #x200B "ZWSP") ; ZERO WIDTH SPACE
1262(aset char-acronym-table #x200C "ZWNJ") ; ZERO WIDTH NON-JOINER
1263(aset char-acronym-table #x200D "ZWJ") ; ZERO WIDTH JOINER
1264(aset char-acronym-table #x200E "LRM") ; LEFT-TO-RIGHT MARK
1265(aset char-acronym-table #x200F "RLM") ; RIGHT-TO-LEFT MARK
1266(aset char-acronym-table #x202A "LRE") ; LEFT-TO-RIGHT EMBEDDING
1267(aset char-acronym-table #x202B "RLE") ; RIGHT-TO-LEFT EMBEDDING
1268(aset char-acronym-table #x202C "PDF") ; POP DIRECTIONAL FORMATTING
1269(aset char-acronym-table #x202D "LRO") ; LEFT-TO-RIGHT OVERRIDE
1270(aset char-acronym-table #x202E "RLO") ; RIGHT-TO-LEFT OVERRIDE
1271(aset char-acronym-table #x2060 "WJ") ; WORD JOINER
1272(aset char-acronym-table #x206A "ISS") ; INHIBIT SYMMETRIC SWAPPING
1273(aset char-acronym-table #x206B "ASS") ; ACTIVATE SYMMETRIC SWAPPING
1274(aset char-acronym-table #x206C "IAFS") ; INHIBIT ARABIC FORM SHAPING
1275(aset char-acronym-table #x206D "AAFS") ; ACTIVATE ARABIC FORM SHAPING
1276(aset char-acronym-table #x206E "NADS") ; NATIONAL DIGIT SHAPES
1277(aset char-acronym-table #x206F "NODS") ; NOMINAL DIGIT SHAPES
1278(aset char-acronym-table #xFEFF "ZWNBSP") ; ZERO WIDTH NO-BREAK SPACE
1279(aset char-acronym-table #xFFF9 "IAA") ; INTERLINEAR ANNOTATION ANCHOR
1280(aset char-acronym-table #xFFFA "IAS") ; INTERLINEAR ANNOTATION SEPARATOR
1281(aset char-acronym-table #xFFFB "IAT") ; INTERLINEAR ANNOTATION TERMINATOR
1282(aset char-acronym-table #x1D173 "BEGBM") ; MUSICAL SYMBOL BEGIN BEAM
1283(aset char-acronym-table #x1D174 "ENDBM") ; MUSICAL SYMBOL END BEAM
1284(aset char-acronym-table #x1D175 "BEGTIE") ; MUSICAL SYMBOL BEGIN TIE
1285(aset char-acronym-table #x1D176 "END") ; MUSICAL SYMBOL END TIE
1286(aset char-acronym-table #x1D177 "BEGSLR") ; MUSICAL SYMBOL BEGIN SLUR
1287(aset char-acronym-table #x1D178 "ENDSLR") ; MUSICAL SYMBOL END SLUR
1288(aset char-acronym-table #x1D179 "BEGPHR") ; MUSICAL SYMBOL BEGIN PHRASE
1289(aset char-acronym-table #x1D17A "ENDPHR") ; MUSICAL SYMBOL END PHRASE
1290(aset char-acronym-table #xE0001 "|->TAG") ; LANGUAGE TAG
1291(aset char-acronym-table #xE0020 "SP TAG") ; TAG SPACE
1292(dotimes (i 94)
1293 (aset char-acronym-table (+ #xE0021 i) (format " %c TAG" (+ 33 i))))
1294(aset char-acronym-table #xE007F "->|TAG") ; CANCEL TAG
1295
0e7c0582 1296(defun update-glyphless-char-display (&optional variable value)
0eb025fb 1297 "Make the setting of `glyphless-char-display-control' take effect.
b2cca856 1298This function updates the char-table `glyphless-char-display'."
0e7c0582
EZ
1299 (when value
1300 (set-default variable value))
1301 (dolist (elt value)
b2cca856
KH
1302 (let ((target (car elt))
1303 (method (cdr elt)))
0eb025fb
EZ
1304 (or (memq method '(zero-width thin-space empty-box acronym hex-code))
1305 (error "Invalid glyphless character display method: %s" method))
b2cca856
KH
1306 (cond ((eq target 'c0-control)
1307 (set-char-table-range glyphless-char-display '(#x00 . #x1F)
96107967
EZ
1308 method)
1309 ;; Users will not expect their newlines and TABs be
1310 ;; displayed as anything but themselves, so exempt those
1311 ;; two characters from c0-control.
1312 (set-char-table-range glyphless-char-display #x9 nil)
1313 (set-char-table-range glyphless-char-display #xa nil))
b2cca856
KH
1314 ((eq target 'c1-control)
1315 (set-char-table-range glyphless-char-display '(#x80 . #x9F)
1316 method))
1317 ((eq target 'format-control)
1318 (map-char-table
1319 #'(lambda (char category)
1320 (if (eq category 'Cf)
1321 (let ((this-method method)
1322 from to)
1323 (if (consp char)
1324 (setq from (car char) to (cdr char))
1325 (setq from char to char))
1326 (while (<= from to)
1327 (when (/= from #xAD)
1328 (if (eq method 'acronym)
0eb025fb 1329 (setq this-method
b2cca856
KH
1330 (aref char-acronym-table from)))
1331 (set-char-table-range glyphless-char-display
1332 from this-method))
1333 (setq from (1+ from))))))
1334 unicode-category-table))
1335 ((eq target 'no-font)
1336 (set-char-table-extra-slot glyphless-char-display 0 method))
1337 (t
0eb025fb 1338 (error "Invalid glyphless character group: %s" target))))))
b2cca856 1339
0e7c0582
EZ
1340;;; Control of displaying glyphless characters.
1341(defcustom glyphless-char-display-control
1342 '((format-control . thin-space)
1343 (no-font . hex-code))
1344 "List of directives to control display of glyphless characters.
1345
1346Each element has the form (GROUP . METHOD), where GROUP is a
1347symbol specifying the character group, and METHOD is a symbol
1348specifying the method of displaying characters belonging to that
1349group.
1350
1351GROUP must be one of these symbols:
96107967 1352 `c0-control': U+0000..U+001F, but excluding newline and TAB.
0e7c0582
EZ
1353 `c1-control': U+0080..U+009F.
1354 `format-control': Characters of Unicode General Category `Cf',
1355 such as U+200C (ZWNJ), U+200E (LRM), but
1356 excluding characters that have graphic images,
1357 such as U+00AD (SHY).
1358 `no-font': characters for which no suitable font is found.
1359 For character terminals, characters that cannot
1360 be encoded by `terminal-coding-system'.
1361
1362METHOD must be one of these symbols:
1363 `zero-width': don't display.
1364 `thin-space': display a thin (1-pixel width) space. On character
1365 terminals, display as 1-character space.
1366 `empty-box': display an empty box.
1367 `acronym': display an acronym of the character in a box. The
1368 acronym is taken from `char-acronym-table', which see.
1369 `hex-code': display the hexadecimal character code in a box."
1370
1371 :type '(alist :key-type (symbol :tag "Character Group")
1372 :value-type (symbol :tag "Display Method"))
1373 :options '((c0-control
1374 (choice (const :tag "Don't display" zero-width)
1375 (const :tag "Display as thin space" thin-space)
1376 (const :tag "Display as empty box" empty-box)
1377 (const :tag "Display acronym" acronym)
1378 (const :tag "Display hex code in a box" hex-code)))
1379 (c1-control
1380 (choice (const :tag "Don't display" zero-width)
1381 (const :tag "Display as thin space" thin-space)
1382 (const :tag "Display as empty box" empty-box)
1383 (const :tag "Display acronym" acronym)
1384 (const :tag "Display hex code in a box" hex-code)))
1385 (format-control
1386 (choice (const :tag "Don't display" zero-width)
1387 (const :tag "Display as thin space" thin-space)
1388 (const :tag "Display as empty box" empty-box)
1389 (const :tag "Display acronym" acronym)
1390 (const :tag "Display hex code in a box" hex-code)))
1391 (no-font
1392 (choice (const :tag "Don't display" zero-width)
1393 (const :tag "Display as thin space" thin-space)
1394 (const :tag "Display as empty box" empty-box)
1395 (const :tag "Display acronym" acronym)
1396 (const :tag "Display hex code in a box" hex-code))))
1397 :set 'update-glyphless-char-display
1398 :group 'display)
1399
b2cca856 1400\f
e7259832
KH
1401;;; Setting word boundary.
1402
e7259832 1403(setq word-combining-categories
4626499f
KH
1404 '((nil . ?^)
1405 (?^ . nil)
7ffefb08
MB
1406 (?C . ?H)
1407 (?C . ?K)))
e7259832
KH
1408
1409(setq word-separating-categories ; (2-byte character sets)
4626499f 1410 '((?H . ?K) ; Hiragana - Katakana
e7259832
KH
1411 ))
1412
1cbfaab9 1413;; Local Variables:
985773c9 1414;; coding: utf-8
1cbfaab9 1415;; End:
777cfce6 1416
1cbfaab9 1417;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d
60370d40 1418;;; characters.el ends here