Fix copyrights.
[bpt/emacs.git] / lisp / international / characters.el
CommitLineData
4ed46869
KH
1;;; characters.el --- set syntax and category for multibyte characters
2
2fd125a3
KH
3;; Copyright (C) 1997, 2000, 2001, 2002, 2003, 2004
4;; Free Software Foundation, Inc.
5;; Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005
6;; National Institute of Advanced Industrial Science and Technology (AIST)
7;; Registration Number H14PRO021
4ed46869
KH
8
9;; Keywords: multibyte character, character set, syntax, category
10
11;; This file is part of GNU Emacs.
12
13;; GNU Emacs is free software; you can redistribute it and/or modify
14;; it under the terms of the GNU General Public License as published by
15;; the Free Software Foundation; either version 2, or (at your option)
16;; any later version.
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
369314dc
KH
24;; along with GNU Emacs; see the file COPYING. If not, write to the
25;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26;; Boston, MA 02111-1307, USA.
4ed46869
KH
27
28;;; Commentary:
29
30;; This file contains multibyte characters. Save this file always in
4b16fa0c 31;; the coding system `iso-2022-7bit'.
4ed46869 32
49adf443
RS
33;; This file does not define the syntax for Latin-N character sets;
34;; those are defined by the files latin-N.el.
35
60370d40
PJ
36;;; Code:
37
ae0916f8
KH
38;; We must set utf-translate-cjk-mode to nil while loading this file
39;; to avoid translating CJK characters in decode-char.
40(defvar saved-utf-translate-cjk-mode utf-translate-cjk-mode)
41(setq utf-translate-cjk-mode nil)
42
4ed46869
KH
43;;; Predefined categories.
44
45;; For each character set.
46
47(define-category ?a "ASCII")
48(define-category ?l "Latin")
49(define-category ?t "Thai")
50(define-category ?g "Greek")
51(define-category ?b "Arabic")
52(define-category ?w "Hebrew")
53(define-category ?y "Cyrillic")
54(define-category ?k "Japanese katakana")
55(define-category ?r "Japanese roman")
56(define-category ?c "Chinese")
57(define-category ?j "Japanese")
58(define-category ?h "Korean")
59(define-category ?e "Ethiopic (Ge'ez)")
60(define-category ?v "Vietnamese")
61(define-category ?i "Indian")
6eba8645 62(define-category ?o "Lao")
9395eb7c 63(define-category ?q "Tibetan")
4ed46869
KH
64
65;; For each group (row) of 2-byte character sets.
66
94487c4e 67(define-category ?A "Alpha-numeric characters of 2-byte character sets")
4ed46869 68(define-category ?C "Chinese (Han) characters of 2-byte character sets")
94487c4e 69(define-category ?G "Greek characters of 2-byte character sets")
4ed46869
KH
70(define-category ?H "Japanese Hiragana characters of 2-byte character sets")
71(define-category ?K "Japanese Katakana characters of 2-byte character sets")
72(define-category ?N "Korean Hangul characters of 2-byte character sets")
94487c4e 73(define-category ?Y "Cyrillic characters of 2-byte character sets")
4ed46869
KH
74(define-category ?I "Indian Glyphs")
75
76;; For phonetic classifications.
77
78(define-category ?0 "consonant")
9765a2ba 79(define-category ?1 "base (independent) vowel")
4ed46869
KH
80(define-category ?2 "upper diacritical mark (including upper vowel)")
81(define-category ?3 "lower diacritical mark (including lower vowel)")
82(define-category ?4 "tone mark")
9765a2ba 83(define-category ?5 "symbol")
4ed46869
KH
84(define-category ?6 "digit")
85(define-category ?7 "vowel-modifying diacritical mark")
6eba8645
KH
86(define-category ?8 "vowel-signs")
87(define-category ?9 "semivowel lower")
4ed46869
KH
88
89;; For filling.
90(define-category ?| "While filling, we can break a line at this character.")
91
504af7b2 92;; For indentation calculation.
70ea295a 93(define-category ?\s
777cfce6 94 "This character counts as a space for indentation purposes.")
504af7b2 95
94487c4e 96;; Keep the following for `kinsoku' processing. See comments in
4ed46869
KH
97;; kinsoku.el.
98(define-category ?> "A character which can't be placed at beginning of line.")
99(define-category ?< "A character which can't be placed at end of line.")
100
269a5dd0
DL
101;; Combining
102(define-category ?^ "Combining diacritic or mark")
4ed46869
KH
103\f
104;;; Setting syntax and category.
105
106;; ASCII
107
108(let ((ch 32))
109 (while (< ch 127) ; All ASCII characters have
110 (modify-category-entry ch ?a) ; the category `a' (ASCII)
111 (modify-category-entry ch ?l) ; and `l' (Latin).
112 (setq ch (1+ ch))))
113
114;; Arabic character set
115
116(let ((charsets '(arabic-iso8859-6
117 arabic-digit
118 arabic-1-column
119 arabic-2-column)))
120 (while charsets
269a5dd0 121;; (modify-syntax-entry (make-char (car charsets)) "w")
4ed46869
KH
122 (modify-category-entry (make-char (car charsets)) ?b)
123 (setq charsets (cdr charsets))))
269a5dd0
DL
124(let ((ch #x600))
125 (while (<= ch #x6ff)
126 (modify-category-entry (decode-char 'ucs ch) ?b)
127 (setq ch (1+ ch)))
128 (setq ch #xfb50)
129 (while (<= ch #xfdff)
130 (modify-category-entry (decode-char 'ucs ch) ?b)
131 (setq ch (1+ ch)))
132 (setq ch #xfe70)
133 (while (<= ch #xfefe)
134 (modify-category-entry (decode-char 'ucs ch) ?b)
135 (setq ch (1+ ch))))
4ed46869
KH
136
137;; Chinese character set (GB2312)
138
269a5dd0 139;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
4ed46869
KH
140(modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
141(modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
142(modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
143(modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
144(modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
145(modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
146(modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
147(modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
148(modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
149(modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
840f8f73
KH
150(modify-syntax-entry ?\\e$A#(\e(B "(\e$A#)\e(B")
151(modify-syntax-entry ?\\e$A#{\e(B "(\e$A#}\e(B")
152(modify-syntax-entry ?\\e$A#[\e(B "(\e$A#]\e(B")
4ed46869
KH
153(modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
154(modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
155(modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
156(modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
157(modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
158(modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
159(modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
840f8f73
KH
160(modify-syntax-entry ?\\e$A#)\e(B ")\e$A#(\e(B")
161(modify-syntax-entry ?\\e$A#}\e(B ")\e$A#{\e(B")
162(modify-syntax-entry ?\\e$A#]\e(B ")\e$A#[\e(B")
4ed46869 163
840f8f73
KH
164(let ((chars "\e$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d\e(B"))
165 (dotimes (i (length chars))
166 (modify-syntax-entry (aref chars i) ".")))
167
4ed46869
KH
168(modify-category-entry (make-char 'chinese-gb2312) ?c)
169(modify-category-entry (make-char 'chinese-gb2312) ?\|)
170(modify-category-entry (make-char 'chinese-gb2312 35) ?A)
171(modify-category-entry (make-char 'chinese-gb2312 36) ?H)
172(modify-category-entry (make-char 'chinese-gb2312 37) ?K)
173(modify-category-entry (make-char 'chinese-gb2312 38) ?G)
174(modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
4ed46869
KH
175(let ((row 48))
176 (while (< row 127)
177 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
178 (setq row (1+ row))))
179
180;; Chinese character set (BIG5)
181
840f8f73
KH
182(let ((from (decode-big5-char #xA141))
183 (to (decode-big5-char #xA15D)))
184 (while (< from to)
185 (modify-syntax-entry from ".")
186 (setq from (1+ from))))
187(let ((from (decode-big5-char #xA1A5))
188 (to (decode-big5-char #xA1AD)))
189 (while (< from to)
190 (modify-syntax-entry from ".")
191 (setq from (1+ from))))
192(let ((from (decode-big5-char #xA1AD))
193 (to (decode-big5-char #xA2AF)))
194 (while (< from to)
195 (modify-syntax-entry from "_")
196 (setq from (1+ from))))
197
198(let ((parens "\e$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
199 open close)
200 (dotimes (i (/ (length parens) 2))
201 (setq open (aref parens (* i 2))
202 close (aref parens (1+ (* i 2))))
203 (modify-syntax-entry open (format "(%c" close))
204 (modify-syntax-entry close (format ")%c" open))))
205
4ed46869
KH
206(let ((generic-big5-1-char (make-char 'chinese-big5-1))
207 (generic-big5-2-char (make-char 'chinese-big5-2)))
269a5dd0
DL
208;; (modify-syntax-entry generic-big5-1-char "w")
209;; (modify-syntax-entry generic-big5-2-char "w")
4ed46869
KH
210
211 (modify-category-entry generic-big5-1-char ?c)
212 (modify-category-entry generic-big5-2-char ?c)
213
214 (modify-category-entry generic-big5-1-char ?C)
215 (modify-category-entry generic-big5-2-char ?C)
216
217 (modify-category-entry generic-big5-1-char ?\|)
218 (modify-category-entry generic-big5-2-char ?\|))
219
220
221;; Chinese character set (CNS11643)
222
223(let ((cns-list '(chinese-cns11643-1
224 chinese-cns11643-2
225 chinese-cns11643-3
226 chinese-cns11643-4
227 chinese-cns11643-5
228 chinese-cns11643-6
229 chinese-cns11643-7))
230 generic-char)
231 (while cns-list
232 (setq generic-char (make-char (car cns-list)))
269a5dd0 233;; (modify-syntax-entry generic-char "w")
4ed46869
KH
234 (modify-category-entry generic-char ?c)
235 (modify-category-entry generic-char ?C)
236 (modify-category-entry generic-char ?|)
237 (setq cns-list (cdr cns-list))))
238
70abfe90
KH
239(let ((parens "\e$(G!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
240 open close)
241 (dotimes (i (/ (length parens) 2))
242 (setq open (aref parens (* i 2))
243 close (aref parens (1+ (* i 2))))
244 (modify-syntax-entry open (format "(%c" close))
245 (modify-syntax-entry close (format ")%c" open))))
246
4ed46869
KH
247;; Cyrillic character set (ISO-8859-5)
248
249(modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
250
28636af6 251(modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
4ed46869
KH
252(modify-syntax-entry ?\e,L-\e(B ".")
253(modify-syntax-entry ?\e,Lp\e(B ".")
254(modify-syntax-entry ?\e,L}\e(B ".")
28636af6
RS
255(let ((tbl (standard-case-table)))
256 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
257 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
258 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
259 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
260 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
261 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
262 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
263 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
264 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
265 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
266 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
267 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
268 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
269 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
270 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
271 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
272 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
273 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
274 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
275 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
276 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
277 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
278 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
279 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
280 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
281 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
282 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
283 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
284 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
285 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
286 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
287 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
288 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
289 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
290 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
291 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
292 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
293 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
294 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
295 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
296 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
297 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
298 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
299 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
300 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
269a5dd0
DL
301 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
302 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
303 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
304 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
305 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
306 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
307 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
308 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
309 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
310 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
311 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
312 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
313 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
314 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
315 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
316 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
317 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
318 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
319 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
320 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
321 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
322 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
323 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
324 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
325 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
326 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
327 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
328 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
329 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
330 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
331 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
332 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
333 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
334 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
335 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
336 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
337 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
338 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
339 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
340 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
341 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
342 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
343 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
344 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
345 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
346 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
347 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
4ed46869 348
9395eb7c
KH
349;; Devanagari character set
350
269a5dd0
DL
351;;; Commented out since the categories appear not to be used anywhere
352;;; and word syntax is the default.
353;; (let ((deflist '(;; chars syntax category
354;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
355;; ; chandrabindu, anuswar, visarga
356;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
357;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
358;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
359;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
360;; ;; Unicode equivalents
361;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
362;; ; chandrabindu, anuswar, visarga
363;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
364;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
365;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
366;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
367;; ))
368;; elm chars len syntax category to ch i)
369;; (while deflist
370;; (setq elm (car deflist))
371;; (setq chars (car elm)
372;; len (length chars)
373;; syntax (nth 1 elm)
374;; category (nth 2 elm)
375;; i 0)
376;; (while (< i len)
377;; (if (= (aref chars i) ?-)
378;; (setq i (1+ i)
379;; to (aref chars i))
380;; (setq ch (aref chars i)
381;; to ch))
382;; (while (<= ch to)
383;; (modify-syntax-entry ch syntax)
384;; (modify-category-entry ch category)
385;; (setq ch (1+ ch)))
386;; (setq i (1+ i)))
387;; (setq deflist (cdr deflist))))
9395eb7c 388
4ed46869
KH
389;; Ethiopic character set
390
391(modify-category-entry (make-char 'ethiopic) ?e)
269a5dd0
DL
392;; (modify-syntax-entry (make-char 'ethiopic) "w")
393(dotimes (i (1+ (- #x137c #x1200)))
394 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
395(let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
396 ;; Unicode equivalents of the above:
397 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
c23d3872
KH
398 (while chars
399 (modify-syntax-entry (car chars) ".")
400 (setq chars (cdr chars))))
4ed46869 401
4ed46869
KH
402;; Greek character set (ISO-8859-7)
403
404(modify-category-entry (make-char 'greek-iso8859-7) ?g)
269a5dd0
DL
405(let ((c #x370))
406 (while (<= c #x3ff)
407 (modify-category-entry (decode-char 'ucs c) ?g)
4ed46869 408 (setq c (1+ c))))
269a5dd0
DL
409
410;; (let ((c 182))
411;; (while (< c 255)
412;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
413;; (setq c (1+ c))))
414;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
4ed46869
KH
415(modify-syntax-entry ?\e,F7\e(B ".")
416(modify-syntax-entry ?\e,F;\e(B ".")
417(modify-syntax-entry ?\e,F=\e(B ".")
4b7c7a40 418(let ((tbl (standard-case-table)))
269a5dd0
DL
419 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
420 ;; in several cases.
421 (set-case-syntax ?\e,F!\e(B "." tbl)
422 (set-case-syntax ?\e,F"\e(B "." tbl)
423 (set-case-syntax ?\e,F&\e(B "." tbl)
424 (set-case-syntax ?\e,F&\e(B "_" tbl)
425 (set-case-syntax ?\e,F'\e(B "." tbl)
426 (set-case-syntax ?\e,F)\e(B "_" tbl)
427 (set-case-syntax ?\e,F+\e(B "." tbl)
428 (set-case-syntax ?\e,F,\e(B "_" tbl)
429 (set-case-syntax ?\e,F-\e(B "_" tbl)
430 (set-case-syntax ?\e,F/\e(B "." tbl)
431 (set-case-syntax ?\e,F0\e(B "_" tbl)
432 (set-case-syntax ?\e,F1\e(B "_" tbl)
433;; (set-case-syntax ?\e,F7\e(B "_" tbl)
434;; (set-case-syntax ?\e,F=\e(B "_" tbl)
4b7c7a40
DL
435 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
436 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
437 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
438 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
439 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
440 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
441 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
442 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
443 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
444 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
445 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
446 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
447 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
448 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
449 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
450 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
451 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
e0a65b4d 452 (set-upcase-syntax ?\e,FS\e(B ?\e,Fr\e(B tbl)
4b7c7a40
DL
453 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
454 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
455 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
456 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
457 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
458 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
459 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
460 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
461 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
462 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
463 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
464 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
465 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
466 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
467 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
269a5dd0
DL
468 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
469 ;; Unicode equivalents
470 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
471 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
472 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
473 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
474 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
475 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
476 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
477 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
478 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
479 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
480 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
481 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
482 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
483 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
484 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
485 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
486 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
e0a65b4d 487 (set-upcase-syntax ?\e$,1'#\e(B ?\e$,1'B\e(B tbl)
269a5dd0
DL
488 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
489 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
490 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
491 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
492 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
493 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
494 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
495 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
496 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
497 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
498 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
499 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
500 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
501 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
502 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
503 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
4ed46869
KH
504
505;; Hebrew character set (ISO-8859-8)
506
227f528e
EZ
507(modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
508(let ((c #x591))
509 (while (<= c #x5f4)
510 (modify-category-entry (decode-char 'ucs c) ?w)
511 (setq c (1+ c))))
512
513(modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
514(modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
515(modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
516(modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
517(modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
518(modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
519(modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
4ed46869 520
269a5dd0
DL
521;; (let ((c 224))
522;; (while (< c 251)
523;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
524;; (setq c (1+ c))))
525;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
4ed46869
KH
526
527;; Indian character set (IS 13194 and other Emacs original Indian charsets)
528
529(modify-category-entry (make-char 'indian-is13194) ?i)
530(modify-category-entry (make-char 'indian-2-column) ?I)
69e138b2 531(modify-category-entry (make-char 'indian-glyph) ?I)
269a5dd0
DL
532;; Unicode Devanagari block
533(let ((c #x901))
534 (while (<= c #x970)
535 (modify-category-entry (decode-char 'ucs c) ?i)
536 (setq c (1+ c))))
4ed46869 537
ed459fb6
KH
538(let ((l '(;; RANGE CATEGORY MEANINGS
539 (#x01 #x03 ?7) ; vowel modifier
540 (#x05 #x14 ?1) ; base vowel
541 (#x15 #x39 ?0) ; consonants
542 (#x3e #x4d ?8) ; vowel modifier
543 (#x51 #x54 ?4) ; stress/tone mark
544 (#x58 #x5f ?0) ; consonants
545 (#x60 #x61 ?1) ; base vowel
546 (#x62 #x63 ?8) ; vowel modifier
547 (#x66 #x6f ?6) ; digits
548 )))
549 (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00))
550 (dolist (elt2 l)
551 (let* ((from (car elt2))
552 (counts (1+ (- (nth 1 elt2) from)))
553 (category (nth 2 elt2)))
554 (dotimes (i counts)
555 (modify-category-entry (decode-char 'ucs (+ elt1 from i))
556 category))))))
4a027a0d 557
4ed46869
KH
558;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
559
560(modify-category-entry (make-char 'katakana-jisx0201) ?k)
226e4119 561(modify-category-entry (make-char 'katakana-jisx0201) ?j)
4ed46869
KH
562(modify-category-entry (make-char 'latin-jisx0201) ?r)
563(modify-category-entry (make-char 'japanese-jisx0208) ?j)
564(modify-category-entry (make-char 'japanese-jisx0212) ?j)
9395eb7c 565(modify-category-entry (make-char 'katakana-jisx0201) ?\|)
4ed46869 566(modify-category-entry (make-char 'japanese-jisx0208) ?\|)
9395eb7c 567(modify-category-entry (make-char 'japanese-jisx0212) ?\|)
4ed46869 568
269a5dd0
DL
569;; Unicode equivalents of JISX0201-kana
570(let ((c #xff61))
571 (while (<= c #xff9f)
572 (modify-category-entry (decode-char 'ucs c) ?k)
573 (modify-category-entry (decode-char 'ucs c) ?j)
574 (modify-category-entry (decode-char 'ucs c) ?\|)
575 (setq c (1+ c))))
576
577;; Katakana block
578(let ((c #x30a0))
579 (while (<= c #x30ff)
580 ;; ?K is double width, ?k isn't specified
581 (modify-category-entry (decode-char 'ucs c) ?k)
582 (modify-category-entry (decode-char 'ucs c) ?j)
a1506d29 583 (modify-category-entry (decode-char 'ucs c) ?\|)
269a5dd0
DL
584 (setq c (1+ c))))
585
586;; Hiragana block
587(let ((c #x3040))
588 (while (<= c #x309f)
589 ;; ?H is actually defined to be double width
590 (modify-category-entry (decode-char 'ucs c) ?H)
591 ;;(modify-category-entry (decode-char 'ucs c) ?j)
a1506d29 592 (modify-category-entry (decode-char 'ucs c) ?\|)
269a5dd0
DL
593 (setq c (1+ c))))
594
4ed46869 595;; JISX0208
269a5dd0 596;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
4ed46869
KH
597(modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
598(modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
599(modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
cf6af551
MB
600(let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
601 (while chars
602 (modify-syntax-entry (car chars) "w")
603 (setq chars (cdr chars))))
70abfe90
KH
604(let ((parens "\e$B!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![\e(B" )
605 open close)
606 (dotimes (i (/ (length parens) 2))
607 (setq open (aref parens (* i 2))
608 close (aref parens (1+ (* i 2))))
609 (modify-syntax-entry open (format "(%c" close))
610 (modify-syntax-entry close (format ")%c" open))))
4ed46869
KH
611
612(modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
613(modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
614(modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
615(modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
616(modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
617(let ((row 48))
618 (while (< row 127)
619 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
620 (setq row (1+ row))))
210dede6
KH
621(modify-category-entry ?\e$B!<\e(B ?K)
622(let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
4ed46869
KH
623 (while chars
624 (modify-category-entry (car chars) ?K)
625 (modify-category-entry (car chars) ?H)
626 (setq chars (cdr chars))))
627(let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
628 (while chars
629 (modify-category-entry (car chars) ?C)
630 (setq chars (cdr chars))))
631
632;; JISX0212
269a5dd0 633;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
4ed46869
KH
634(modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
635(modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
636(modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
637
638(modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
639
640;; JISX0201-Kana
269a5dd0
DL
641;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
642(let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
643 ;; Unicode:
644 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
4ed46869
KH
645 (while chars
646 (modify-syntax-entry (car chars) ".")
647 (setq chars (cdr chars))))
648
226e4119
KH
649(modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
650(modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
651
4ed46869
KH
652;; Korean character set (KSC5601)
653
269a5dd0 654;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
4ed46869
KH
655(modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
656(modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
657(modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
658(modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
659(modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
660(modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
661
662(modify-category-entry (make-char 'korean-ksc5601) ?h)
663(modify-category-entry (make-char 'korean-ksc5601 35) ?A)
664(modify-category-entry (make-char 'korean-ksc5601 37) ?G)
665(modify-category-entry (make-char 'korean-ksc5601 42) ?H)
666(modify-category-entry (make-char 'korean-ksc5601 43) ?K)
667(modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
668
013d402e 669(let ((parens "\e$(C!2!3!4!5!6!7!8!9!:!;!<!=#(#)#[#]#{#}\e(B" )
70abfe90
KH
670 open close)
671 (dotimes (i (/ (length parens) 2))
672 (setq open (aref parens (* i 2))
673 close (aref parens (1+ (* i 2))))
674 (modify-syntax-entry open (format "(%c" close))
675 (modify-syntax-entry close (format ")%c" open))))
676
d05cfa1f
KH
677;; Latin character set (latin-1,2,3,4,5,8,9)
678
679(modify-category-entry (make-char 'latin-iso8859-1) ?l)
680(modify-category-entry (make-char 'latin-iso8859-2) ?l)
681(modify-category-entry (make-char 'latin-iso8859-3) ?l)
682(modify-category-entry (make-char 'latin-iso8859-4) ?l)
683(modify-category-entry (make-char 'latin-iso8859-9) ?l)
684(modify-category-entry (make-char 'latin-iso8859-14) ?l)
685(modify-category-entry (make-char 'latin-iso8859-15) ?l)
686
687(modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
688(modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
689(modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
690(modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
691(modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
692(modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
693(modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
694
6eba8645
KH
695;; Lao character set
696
697(modify-category-entry (make-char 'lao) ?o)
269a5dd0
DL
698(dotimes (i (1+ (- #xeff #xe80)))
699 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
6eba8645
KH
700
701(let ((deflist '(;; chars syntax category
702 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
703 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
704 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
705 ("\e(1XY\e(B" "w" ?3) ; vowel lower
a1506d29 706 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
6eba8645 707 ("\e(1\\e(B" "w" ?9) ; semivowel lower
9765a2ba 708 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
d5f05a67 709 ("\e(1Of\e(B" "_" ?5) ; symbol
269a5dd0
DL
710 ;; Unicode equivalents
711 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
712 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
713 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
714 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
a1506d29 715 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
269a5dd0
DL
716 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
717 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
718 ("\e$,1DODf\e(B" "_" ?5) ; symbol
6eba8645
KH
719 ))
720 elm chars len syntax category to ch i)
721 (while deflist
722 (setq elm (car deflist))
723 (setq chars (car elm)
724 len (length chars)
725 syntax (nth 1 elm)
726 category (nth 2 elm)
727 i 0)
728 (while (< i len)
729 (if (= (aref chars i) ?-)
730 (setq i (1+ i)
4a027a0d
KH
731 to (aref chars i))
732 (setq ch (aref chars i)
6eba8645
KH
733 to ch))
734 (while (<= ch to)
269a5dd0
DL
735 (unless (string-equal syntax "w")
736 (modify-syntax-entry ch syntax))
6eba8645
KH
737 (modify-category-entry ch category)
738 (setq ch (1+ ch)))
4a027a0d 739 (setq i (1+ i)))
6eba8645
KH
740 (setq deflist (cdr deflist))))
741
4ed46869
KH
742;; Thai character set (TIS620)
743
744(modify-category-entry (make-char 'thai-tis620) ?t)
269a5dd0
DL
745(dotimes (i (1+ (- #xe7f #xe00)))
746 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
4ed46869
KH
747
748(let ((deflist '(;; chars syntax category
749 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
750 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
751 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
752 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
a1506d29 753 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
9765a2ba
KH
754 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
755 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
269a5dd0
DL
756 ;; Unicode equivalents
757 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
758 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
759 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
760 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
a1506d29 761 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
269a5dd0
DL
762 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
763 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
4ed46869
KH
764 ))
765 elm chars len syntax category to ch i)
9395eb7c
KH
766 (while deflist
767 (setq elm (car deflist))
768 (setq chars (car elm)
769 len (length chars)
770 syntax (nth 1 elm)
771 category (nth 2 elm)
772 i 0)
773 (while (< i len)
774 (if (= (aref chars i) ?-)
775 (setq i (1+ i)
4a027a0d
KH
776 to (aref chars i))
777 (setq ch (aref chars i)
9395eb7c
KH
778 to ch))
779 (while (<= ch to)
269a5dd0
DL
780 (unless (string-equal syntax "w")
781 (modify-syntax-entry ch syntax))
9395eb7c
KH
782 (modify-category-entry ch category)
783 (setq ch (1+ ch)))
4a027a0d 784 (setq i (1+ i)))
9395eb7c
KH
785 (setq deflist (cdr deflist))))
786
787;; Tibetan character set
788
16230888
KH
789(modify-category-entry (make-char 'tibetan) ?q)
790(modify-category-entry (make-char 'tibetan-1-column) ?q)
269a5dd0
DL
791(dotimes (i (1+ (- #xfff #xf00)))
792 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
9395eb7c
KH
793
794(let ((deflist '(;; chars syntax category
269a5dd0 795 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
e6f02372 796 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
9395eb7c
KH
797 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
798 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
799 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
800 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
e6f02372 801 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
9395eb7c
KH
802 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
803 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
804 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
805 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
806 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
807 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
e6f02372 808 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
269a5dd0
DL
809
810 ;; Unicode version (not complete)
811 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
812 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
813 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
814 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
815 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
816 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
817 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
818 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
819 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
820 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
9395eb7c
KH
821 ))
822 elm chars len syntax category to ch i)
4ed46869
KH
823 (while deflist
824 (setq elm (car deflist))
825 (setq chars (car elm)
826 len (length chars)
827 syntax (nth 1 elm)
828 category (nth 2 elm)
829 i 0)
830 (while (< i len)
831 (if (= (aref chars i) ?-)
832 (setq i (1+ i)
4a027a0d
KH
833 to (aref chars i))
834 (setq ch (aref chars i)
4ed46869
KH
835 to ch))
836 (while (<= ch to)
269a5dd0
DL
837 (unless (string-equal syntax "w")
838 (modify-syntax-entry ch syntax))
4ed46869
KH
839 (modify-category-entry ch category)
840 (setq ch (1+ ch)))
4a027a0d 841 (setq i (1+ i)))
4ed46869
KH
842 (setq deflist (cdr deflist))))
843
844;; Vietnamese character set
845
846(let ((lower (make-char 'vietnamese-viscii-lower))
847 (upper (make-char 'vietnamese-viscii-upper)))
269a5dd0
DL
848;; (modify-syntax-entry lower "w")
849;; (modify-syntax-entry upper "w")
4ed46869
KH
850 (modify-category-entry lower ?v)
851 (modify-category-entry upper ?v)
852 (modify-category-entry lower ?l) ; To make a word with
853 (modify-category-entry upper ?l) ; latin characters.
854 )
855
e5dd1155
KH
856(let ((tbl (standard-case-table))
857 (i 32))
858 (while (< i 128)
859 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
860 (make-char 'vietnamese-viscii-lower i)
861 tbl)
862 (setq i (1+ i))))
863
d05cfa1f
KH
864;; Unicode (mule-unicode-0100-24ff)
865
85ef8ece
KH
866(let ((tbl (standard-case-table)) c)
867
868;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
869;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
870;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
871;; Thus we have to check language-environment to handle casing
872;; correctly. Currently only I<->i is available.
873
85ef8ece
KH
874 ;; Latin Extended-A, Latin Extended-B
875 (setq c #x0100)
876 (while (<= c #x0233)
d05cfa1f 877 (modify-category-entry (decode-char 'ucs c) ?l)
85ef8ece
KH
878 (and (or (<= c #x012e)
879 (and (>= c #x014a) (<= c #x0177)))
d05cfa1f
KH
880 (zerop (% c 2))
881 (set-case-syntax-pair
882 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
85ef8ece
KH
883 (and (>= c #x013a)
884 (<= c #x0148)
885 (zerop (% c 2))
886 (set-case-syntax-pair
887 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
d05cfa1f 888 (setq c (1+ c)))
946ce785
KH
889 (set-downcase-syntax ?\e$,1 P\e(B ?i tbl)
890 (set-upcase-syntax ?I ?\e$,1 Q\e(B tbl)
85ef8ece
KH
891 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
892 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
893 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
e0a65b4d 894 (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl)
d05cfa1f
KH
895 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
896 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
897 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
898
269a5dd0
DL
899 ;; Latin Extended-B
900 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
901 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
902 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
903 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
904 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
905 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
906 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
907 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
908 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
909 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
910 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
911 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
912 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
913 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
914 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
915 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
916 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
917 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
918 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
919 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
920 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
921 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
922 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
923 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
924 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
925 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
926 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
927 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
928 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
929 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
930 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
931 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
932 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
933 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
934 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
935 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
936 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
937 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
938 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
939 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
940 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
941 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
942 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
943 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
944 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
945 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
946 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
947 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
948 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
949 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
950 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
951 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
952 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
953 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
954 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
955 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
956 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
957 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
958 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
959 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
960 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
961 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
962 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
963 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
964 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
965 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
966 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
967 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
968 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
969 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
970 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
971 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
972 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
973 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
974 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
975 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
976 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
977 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
978 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
979 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
980 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
981 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
982 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
983 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
984 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
985 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
986 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
987 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
988 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
989 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
990 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
991 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
992 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
993 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
994
85ef8ece
KH
995 ;; Latin Extended Additional
996 (setq c #x1e00)
d05cfa1f
KH
997 (while (<= c #x1ef9)
998 (modify-category-entry (decode-char 'ucs c) ?l)
999 (and (zerop (% c 2))
1000 (or (<= c #x1e94) (>= c #x1ea0))
1001 (set-case-syntax-pair
1002 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1003 (setq c (1+ c)))
1004
85ef8ece
KH
1005 ;; Greek
1006 (setq c #x0370)
d05cfa1f
KH
1007 (while (<= c #x03ff)
1008 (modify-category-entry (decode-char 'ucs c) ?g)
1009 (if (or (and (>= c #x0391) (<= c #x03a1))
1010 (and (>= c #x03a3) (<= c #x03ab)))
1011 (set-case-syntax-pair
1012 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1013 (and (>= c #x03da)
1014 (<= c #x03ee)
1015 (zerop (% c 2))
1016 (set-case-syntax-pair
1017 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1018 (setq c (1+ c)))
1019 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
1020 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
1021 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
1022 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
1023 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
1024 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
1025 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
1026
269a5dd0
DL
1027 ;; Armenian
1028 (setq c #x531)
1029 (while (<= c #x556)
1030 (set-case-syntax-pair (decode-char 'ucs c)
1031 (decode-char 'ucs (+ c #x30)) tbl)
1032 (setq c (1+ c)))
1033
85ef8ece
KH
1034 ;; Greek Extended
1035 (setq c #x1f00)
d05cfa1f
KH
1036 (while (<= c #x1fff)
1037 (modify-category-entry (decode-char 'ucs c) ?g)
1038 (and (<= (logand c #x000f) 7)
1039 (<= c #x1fa7)
1040 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1041 (/= (logand c #x00f0) 7)
1042 (set-case-syntax-pair
1043 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1044 (setq c (1+ c)))
1045 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1046 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1047 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1048 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1049 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1050 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1051 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1052 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1053 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1054 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1055 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1056 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1057 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1058 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1059 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1060 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1061 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1062 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1063 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1064 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1065 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1066 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1067 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1068 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1069
85ef8ece
KH
1070 ;; cyrillic
1071 (setq c #x0400)
d05cfa1f
KH
1072 (while (<= c #x04ff)
1073 (modify-category-entry (decode-char 'ucs c) ?y)
1074 (and (>= c #x0400)
1075 (<= c #x040f)
1076 (set-case-syntax-pair
1077 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1078 (and (>= c #x0410)
1079 (<= c #x042f)
1080 (set-case-syntax-pair
1081 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1082 (and (zerop (% c 2))
1083 (or (and (>= c #x0460) (<= c #x0480))
1084 (and (>= c #x048c) (<= c #x04be))
1085 (and (>= c #x04d0) (<= c #x04f4)))
1086 (set-case-syntax-pair
a1506d29 1087 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
d05cfa1f
KH
1088 (setq c (1+ c)))
1089 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1090 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1091 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1092 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1093 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1094
85ef8ece
KH
1095 ;; general punctuation
1096 (setq c #x2000)
d05cfa1f 1097 (while (<= c #x200b)
348a6a50 1098 (set-case-syntax (decode-char 'ucs c) " " tbl)
c1dc897c 1099 (setq c (1+ c)))
d05cfa1f 1100 (setq c #x2010)
c1dc897c
DL
1101 (while (<= c #x2016)
1102 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1103 (setq c (1+ c)))
1104 ;; Punctuation syntax for quotation marks (like `)
1105 (while (<= c #x201f)
1106 (set-case-syntax (decode-char 'ucs c) "." tbl)
1107 (setq c (1+ c)))
d05cfa1f 1108 (while (<= c #x2027)
348a6a50 1109 (set-case-syntax (decode-char 'ucs c) "_" tbl)
c1dc897c 1110 (setq c (1+ c)))
d05cfa1f 1111
269a5dd0
DL
1112 ;; Roman numerals
1113 (setq c #x2160)
1114 (while (<= c #x216f)
1115 (set-case-syntax-pair (decode-char 'ucs c)
1116 (decode-char 'ucs (+ c #x10)) tbl)
1117 (setq c (1+ c)))
1118
1119 ;; Circled Latin
1120 (setq c #x24b6)
1121 (while (<= c #x24cf)
1122 (set-case-syntax-pair (decode-char 'ucs c)
1123 (decode-char 'ucs (+ c 26)) tbl)
1124 (modify-category-entry (decode-char 'ucs c) ?l)
1125 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1126 (setq c (1+ c)))
1127
1128 ;; Fullwidth Latin
1129 (setq c #xff21)
1130 (while (<= c #xff3a)
1131 (set-case-syntax-pair (decode-char 'ucs c)
1132 (decode-char 'ucs (+ c #x20)) tbl)
1133 (modify-category-entry (decode-char 'ucs c) ?l)
1134 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1135 (setq c (1+ c)))
1136
269a5dd0
DL
1137 ;; Combining diacritics
1138 (setq c #x300)
1139 (while (<= c #x362)
1140 (modify-category-entry (decode-char 'ucs c) ?^)
1141 (setq c (1+ c)))
1142
1143 ;; Combining marks
1144 (setq c #x20d0)
1145 (while (<= c #x20e3)
1146 (modify-category-entry (decode-char 'ucs c) ?^)
1147 (setq c (1+ c)))
1148
1149 ;; Fixme: syntax for symbols &c
1150 )
d6af0bff
KH
1151
1152(let ((pairs
1153 '("\e$,1sEsF\e(B" ; U+2045 U+2046
1154 "\e$,1s}s~\e(B" ; U+207D U+207E
1155 "\e$,1t-t.\e(B" ; U+208D U+208E
d6af0bff
KH
1156 "\e$,1{){*\e(B" ; U+2329 U+232A
1157 "\e$,1|T|U\e(B" ; U+23B4 U+23B5
1158 "\e$,2&H&I\e(B" ; U+2768 U+2769
1159 "\e$,2&J&K\e(B" ; U+276A U+276B
1160 "\e$,2&L&M\e(B" ; U+276C U+276D
1161 "\e$,2&P&Q\e(B" ; U+2770 U+2771
1162 "\e$,2&R&S\e(B" ; U+2772 U+2773
1163 "\e$,2&T&U\e(B" ; U+2774 U+2775
1164 "\e$,2'f'g\e(B" ; U+27E6 U+27E7
1165 "\e$,2'h'i\e(B" ; U+27E8 U+27E9
1166 "\e$,2'j'k\e(B" ; U+27EA U+27EB
1167 "\e$,2,#,$\e(B" ; U+2983 U+2984
1168 "\e$,2,%,&\e(B" ; U+2985 U+2986
1169 "\e$,2,',(\e(B" ; U+2987 U+2988
1170 "\e$,2,),*\e(B" ; U+2989 U+298A
1171 "\e$,2,+,,\e(B" ; U+298B U+298C
1172 "\e$,2,-,.\e(B" ; U+298D U+298E
1173 "\e$,2,/,0\e(B" ; U+298F U+2990
1174 "\e$,2,1,2\e(B" ; U+2991 U+2992
1175 "\e$,2,3,4\e(B" ; U+2993 U+2994
1176 "\e$,2,5,6\e(B" ; U+2995 U+2996
1177 "\e$,2,7,8\e(B" ; U+2997 U+2998
1178 "\e$,2-<-=\e(B" ; U+29FC U+29FD
1179 "\e$,2=H=I\e(B" ; U+3008 U+3009
1180 "\e$,2=J=K\e(B" ; U+300A U+300B
1181 "\e$,2=L=M\e(B" ; U+300C U+300D
1182 "\e$,2=N=O\e(B" ; U+300E U+300F
1183 "\e$,2=P=Q\e(B" ; U+3010 U+3011
1184 "\e$,2=T=U\e(B" ; U+3014 U+3015
1185 "\e$,2=V=W\e(B" ; U+3016 U+3017
1186 "\e$,2=X=Y\e(B" ; U+3018 U+3019
1187 "\e$,2=Z=[\e(B" ; U+301A U+301B
1188 "\e$,3m~m\7f\e(B" ; U+FD3E U+FD3F
1189 "\e$,3pUpV\e(B" ; U+FE35 U+FE36
1190 "\e$,3pWpX\e(B" ; U+FE37 U+FE38
1191 "\e$,3pYpZ\e(B" ; U+FE39 U+FE3A
1192 "\e$,3p[p\\e(B" ; U+FE3B U+FE3C
1193 "\e$,3p]p^\e(B" ; U+FE3D U+FE3E
1194 "\e$,3p_p`\e(B" ; U+FE3F U+FE40
1195 "\e$,3papb\e(B" ; U+FE41 U+FE42
1196 "\e$,3pcpd\e(B" ; U+FE43 U+FE44
1197 "\e$,3pypz\e(B" ; U+FE59 U+FE5A
1198 "\e$,3p{p|\e(B" ; U+FE5B U+FE5C
1199 "\e$,3p}p~\e(B" ; U+FE5D U+FE5E
1200 "\e$,3rhri\e(B" ; U+FF08 U+FF09
1201 "\e$,3s;s=\e(B" ; U+FF3B U+FF3D
1202 "\e$,3s[s]\e(B" ; U+FF5B U+FF5D
1203 "\e$,3s_s`\e(B" ; U+FF5F U+FF60
1204 "\e$,3sbsc\e(B" ; U+FF62 U+FF63
1205 )))
1206 (dolist (elt pairs)
1207 (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
1208 (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
1209
4ed46869
KH
1210\f
1211;;; Setting word boundary.
1212
1213(setq word-combining-categories
1214 '((?l . ?l)))
1215
1216(setq word-separating-categories ; (2-byte character sets)
1217 '((?A . ?K) ; Alpha numeric - Katakana
1218 (?A . ?C) ; Alpha numeric - Chinese
1219 (?H . ?A) ; Hiragana - Alpha numeric
1220 (?H . ?K) ; Hiragana - Katakana
1221 (?H . ?C) ; Hiragana - Chinese
1222 (?K . ?A) ; Katakana - Alpha numeric
1223 (?K . ?C) ; Katakana - Chinese
1224 (?C . ?A) ; Chinese - Alpha numeric
1225 (?C . ?K) ; Chinese - Katakana
1226 ))
777cfce6
KH
1227
1228\f
1229;; For each character set, put the information of the most proper
aaa9f206 1230;; coding system to encode it by `preferred-coding-system' property.
777cfce6
KH
1231
1232(let ((l '((latin-iso8859-1 . iso-latin-1)
1233 (latin-iso8859-2 . iso-latin-2)
1234 (latin-iso8859-3 . iso-latin-3)
1235 (latin-iso8859-4 . iso-latin-4)
1236 (thai-tis620 . thai-tis620)
1237 (greek-iso8859-7 . greek-iso-8bit)
1238 (arabic-iso8859-6 . iso-2022-7bit)
1239 (hebrew-iso8859-8 . hebrew-iso-8bit)
1240 (katakana-jisx0201 . japanese-shift-jis)
1241 (latin-jisx0201 . japanese-shift-jis)
1242 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1243 (latin-iso8859-9 . iso-latin-5)
1244 (japanese-jisx0208-1978 . iso-2022-jp)
1245 (chinese-gb2312 . cn-gb-2312)
1246 (japanese-jisx0208 . iso-2022-jp)
1247 (korean-ksc5601 . iso-2022-kr)
1248 (japanese-jisx0212 . iso-2022-jp)
1249 (chinese-cns11643-1 . iso-2022-cn)
1250 (chinese-cns11643-2 . iso-2022-cn)
1251 (chinese-big5-1 . chinese-big5)
1252 (chinese-big5-2 . chinese-big5)
1253 (chinese-sisheng . iso-2022-7bit)
1254 (ipa . iso-2022-7bit)
1255 (vietnamese-viscii-lower . vietnamese-viscii)
1256 (vietnamese-viscii-upper . vietnamese-viscii)
1257 (arabic-digit . iso-2022-7bit)
1258 (arabic-1-column . iso-2022-7bit)
1259 (ascii-right-to-left . iso-2022-7bit)
1260 (lao . lao)
1261 (arabic-2-column . iso-2022-7bit)
1262 (indian-is13194 . devanagari)
69e138b2 1263 (indian-glyph . devanagari)
777cfce6 1264 (tibetan-1-column . tibetan)
58cd41a3 1265 (ethiopic . iso-2022-7bit)
777cfce6
KH
1266 (chinese-cns11643-3 . iso-2022-cn)
1267 (chinese-cns11643-4 . iso-2022-cn)
1268 (chinese-cns11643-5 . iso-2022-cn)
1269 (chinese-cns11643-6 . iso-2022-cn)
1270 (chinese-cns11643-7 . iso-2022-cn)
1271 (indian-2-column . devanagari)
7a860cf2
DL
1272 (tibetan . tibetan)
1273 (latin-iso8859-14 . iso-latin-8)
1274 (latin-iso8859-15 . iso-latin-9))))
777cfce6 1275 (while l
aaa9f206 1276 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
777cfce6 1277 (setq l (cdr l))))
df0415c5
KH
1278
1279\f
98a663f1 1280;; Setup auto-fill-chars for charsets that should invoke auto-filling.
269a5dd0
DL
1281;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1282;; property on the charsets.
df0415c5
KH
1283(let ((l '(katakana-jisx0201
1284 japanese-jisx0208 japanese-jisx0212
1285 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1286 (while l
1287 (aset auto-fill-chars (make-char (car l)) t)
98a663f1 1288 (put-charset-property (car l) 'nospace-between-words t)
df0415c5 1289 (setq l (cdr l))))
777cfce6 1290
ae0916f8
KH
1291\f
1292(setq utf-translate-cjk-mode saved-utf-translate-cjk-mode)
1293(makunbound 'saved-utf-translate-cjk-mode)
1294
777cfce6
KH
1295;;; Local Variables:
1296;;; coding: iso-2022-7bit
1297;;; End:
1298
ab5796a9 1299;;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d
60370d40 1300;;; characters.el ends here