Trailing whitepace deleted.
[bpt/emacs.git] / lisp / international / characters.el
CommitLineData
4ed46869
KH
1;;; characters.el --- set syntax and category for multibyte characters
2
28636af6 3;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
fa526c4a 4;; Licensed to the Free Software Foundation.
cf6af551 5;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4ed46869
KH
6
7;; Keywords: multibyte character, character set, syntax, category
8
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
13;; the Free Software Foundation; either version 2, or (at your option)
14;; any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
369314dc
KH
22;; along with GNU Emacs; see the file COPYING. If not, write to the
23;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24;; Boston, MA 02111-1307, USA.
4ed46869
KH
25
26;;; Commentary:
27
28;; This file contains multibyte characters. Save this file always in
4b16fa0c 29;; the coding system `iso-2022-7bit'.
4ed46869 30
49adf443
RS
31;; This file does not define the syntax for Latin-N character sets;
32;; those are defined by the files latin-N.el.
33
60370d40
PJ
34;;; Code:
35
4ed46869
KH
36;;; Predefined categories.
37
38;; For each character set.
39
40(define-category ?a "ASCII")
41(define-category ?l "Latin")
42(define-category ?t "Thai")
43(define-category ?g "Greek")
44(define-category ?b "Arabic")
45(define-category ?w "Hebrew")
46(define-category ?y "Cyrillic")
47(define-category ?k "Japanese katakana")
48(define-category ?r "Japanese roman")
49(define-category ?c "Chinese")
50(define-category ?j "Japanese")
51(define-category ?h "Korean")
52(define-category ?e "Ethiopic (Ge'ez)")
53(define-category ?v "Vietnamese")
54(define-category ?i "Indian")
6eba8645 55(define-category ?o "Lao")
9395eb7c 56(define-category ?q "Tibetan")
4ed46869
KH
57
58;; For each group (row) of 2-byte character sets.
59
94487c4e 60(define-category ?A "Alpha-numeric characters of 2-byte character sets")
4ed46869 61(define-category ?C "Chinese (Han) characters of 2-byte character sets")
94487c4e 62(define-category ?G "Greek characters of 2-byte character sets")
4ed46869
KH
63(define-category ?H "Japanese Hiragana characters of 2-byte character sets")
64(define-category ?K "Japanese Katakana characters of 2-byte character sets")
65(define-category ?N "Korean Hangul characters of 2-byte character sets")
94487c4e 66(define-category ?Y "Cyrillic characters of 2-byte character sets")
4ed46869
KH
67(define-category ?I "Indian Glyphs")
68
69;; For phonetic classifications.
70
71(define-category ?0 "consonant")
9765a2ba 72(define-category ?1 "base (independent) vowel")
4ed46869
KH
73(define-category ?2 "upper diacritical mark (including upper vowel)")
74(define-category ?3 "lower diacritical mark (including lower vowel)")
75(define-category ?4 "tone mark")
9765a2ba 76(define-category ?5 "symbol")
4ed46869
KH
77(define-category ?6 "digit")
78(define-category ?7 "vowel-modifying diacritical mark")
6eba8645
KH
79(define-category ?8 "vowel-signs")
80(define-category ?9 "semivowel lower")
4ed46869
KH
81
82;; For filling.
83(define-category ?| "While filling, we can break a line at this character.")
84
504af7b2 85;; For indentation calculation.
a1506d29 86(define-category ?
777cfce6 87 "This character counts as a space for indentation purposes.")
504af7b2 88
94487c4e 89;; Keep the following for `kinsoku' processing. See comments in
4ed46869
KH
90;; kinsoku.el.
91(define-category ?> "A character which can't be placed at beginning of line.")
92(define-category ?< "A character which can't be placed at end of line.")
93
269a5dd0
DL
94;; Combining
95(define-category ?^ "Combining diacritic or mark")
4ed46869
KH
96\f
97;;; Setting syntax and category.
98
99;; ASCII
100
101(let ((ch 32))
102 (while (< ch 127) ; All ASCII characters have
103 (modify-category-entry ch ?a) ; the category `a' (ASCII)
104 (modify-category-entry ch ?l) ; and `l' (Latin).
105 (setq ch (1+ ch))))
106
107;; Arabic character set
108
109(let ((charsets '(arabic-iso8859-6
110 arabic-digit
111 arabic-1-column
112 arabic-2-column)))
113 (while charsets
269a5dd0 114;; (modify-syntax-entry (make-char (car charsets)) "w")
4ed46869
KH
115 (modify-category-entry (make-char (car charsets)) ?b)
116 (setq charsets (cdr charsets))))
269a5dd0
DL
117(let ((ch #x600))
118 (while (<= ch #x6ff)
119 (modify-category-entry (decode-char 'ucs ch) ?b)
120 (setq ch (1+ ch)))
121 (setq ch #xfb50)
122 (while (<= ch #xfdff)
123 (modify-category-entry (decode-char 'ucs ch) ?b)
124 (setq ch (1+ ch)))
125 (setq ch #xfe70)
126 (while (<= ch #xfefe)
127 (modify-category-entry (decode-char 'ucs ch) ?b)
128 (setq ch (1+ ch))))
4ed46869
KH
129
130;; Chinese character set (GB2312)
131
269a5dd0 132;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
4ed46869
KH
133(modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
134(modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
135(modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
136(modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
137(modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
138(modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
139(modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
140(modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
141(modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
142(modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
143(modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
144(modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
145(modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
146(modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
147(modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
148(modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
149(modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
269a5dd0
DL
150;; Unicode equivalents of above
151(modify-syntax-entry ?\\e$,2=T\e(B "(\e$,2=U\e(B")
152(modify-syntax-entry ?\\e$,2=H\e(B "(\e$,2=I\e(B")
153(modify-syntax-entry ?\\e$,2=J\e(B "(\e$,2=K\e(B")
154(modify-syntax-entry ?\\e$,2=L\e(B "(\e$,2=M\e(B")
155(modify-syntax-entry ?\\e$,2=N\e(B "(\e$,2=O\e(B")
156(modify-syntax-entry ?\\e$,2=V\e(B "(\e$,2=W\e(B")
157(modify-syntax-entry ?\\e$,2=P\e(B "(\e$,2=Q\e(B")
158(modify-syntax-entry ?\\e$,2=U\e(B ")\e$,2=T\e(B")
159(modify-syntax-entry ?\\e$,2=I\e(B ")\e$,2=H\e(B")
160(modify-syntax-entry ?\\e$,2=K\e(B ")\e$,2=J\e(B")
161(modify-syntax-entry ?\\e$,2=M\e(B ")\e$,2=L\e(B")
162(modify-syntax-entry ?\\e$,2=O\e(B ")\e$,2=N\e(B")
163(modify-syntax-entry ?\\e$,2=W\e(B ")\e$,2=V\e(B")
164(modify-syntax-entry ?\\e$,2=Q\e(B ")\e$,2=P\e(B")
4ed46869
KH
165
166(modify-category-entry (make-char 'chinese-gb2312) ?c)
167(modify-category-entry (make-char 'chinese-gb2312) ?\|)
168(modify-category-entry (make-char 'chinese-gb2312 35) ?A)
169(modify-category-entry (make-char 'chinese-gb2312 36) ?H)
170(modify-category-entry (make-char 'chinese-gb2312 37) ?K)
171(modify-category-entry (make-char 'chinese-gb2312 38) ?G)
172(modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
4ed46869
KH
173(let ((row 48))
174 (while (< row 127)
175 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
176 (setq row (1+ row))))
177
178;; Chinese character set (BIG5)
179
180(let ((generic-big5-1-char (make-char 'chinese-big5-1))
181 (generic-big5-2-char (make-char 'chinese-big5-2)))
269a5dd0
DL
182;; (modify-syntax-entry generic-big5-1-char "w")
183;; (modify-syntax-entry generic-big5-2-char "w")
4ed46869
KH
184
185 (modify-category-entry generic-big5-1-char ?c)
186 (modify-category-entry generic-big5-2-char ?c)
187
188 (modify-category-entry generic-big5-1-char ?C)
189 (modify-category-entry generic-big5-2-char ?C)
190
191 (modify-category-entry generic-big5-1-char ?\|)
192 (modify-category-entry generic-big5-2-char ?\|))
193
194
195;; Chinese character set (CNS11643)
196
197(let ((cns-list '(chinese-cns11643-1
198 chinese-cns11643-2
199 chinese-cns11643-3
200 chinese-cns11643-4
201 chinese-cns11643-5
202 chinese-cns11643-6
203 chinese-cns11643-7))
204 generic-char)
205 (while cns-list
206 (setq generic-char (make-char (car cns-list)))
269a5dd0 207;; (modify-syntax-entry generic-char "w")
4ed46869
KH
208 (modify-category-entry generic-char ?c)
209 (modify-category-entry generic-char ?C)
210 (modify-category-entry generic-char ?|)
211 (setq cns-list (cdr cns-list))))
212
213;; Cyrillic character set (ISO-8859-5)
214
215(modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
216
28636af6 217(modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
4ed46869
KH
218(modify-syntax-entry ?\e,L-\e(B ".")
219(modify-syntax-entry ?\e,Lp\e(B ".")
220(modify-syntax-entry ?\e,L}\e(B ".")
28636af6
RS
221(let ((tbl (standard-case-table)))
222 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
223 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
224 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
225 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
226 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
227 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
228 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
229 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
230 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
231 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
232 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
233 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
234 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
235 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
236 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
237 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
238 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
239 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
240 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
241 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
242 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
243 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
244 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
245 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
246 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
247 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
248 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
249 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
250 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
251 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
252 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
253 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
254 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
255 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
256 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
257 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
258 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
259 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
260 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
261 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
262 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
263 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
264 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
265 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
266 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
269a5dd0
DL
267 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
268 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
269 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
270 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
271 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
272 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
273 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
274 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
275 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
276 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
277 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
278 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
279 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
280 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
281 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
282 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
283 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
284 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
285 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
286 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
287 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
288 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
289 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
290 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
291 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
292 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
293 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
294 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
295 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
296 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
297 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
298 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
299 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
300 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
301 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
302 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
303 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
304 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
305 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
306 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
307 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
308 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
309 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
310 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
311 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
312 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
313 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
4ed46869 314
9395eb7c
KH
315;; Devanagari character set
316
269a5dd0
DL
317;;; Commented out since the categories appear not to be used anywhere
318;;; and word syntax is the default.
319;; (let ((deflist '(;; chars syntax category
320;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
321;; ; chandrabindu, anuswar, visarga
322;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
323;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
324;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
325;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
326;; ;; Unicode equivalents
327;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
328;; ; chandrabindu, anuswar, visarga
329;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
330;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
331;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
332;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
333;; ))
334;; elm chars len syntax category to ch i)
335;; (while deflist
336;; (setq elm (car deflist))
337;; (setq chars (car elm)
338;; len (length chars)
339;; syntax (nth 1 elm)
340;; category (nth 2 elm)
341;; i 0)
342;; (while (< i len)
343;; (if (= (aref chars i) ?-)
344;; (setq i (1+ i)
345;; to (aref chars i))
346;; (setq ch (aref chars i)
347;; to ch))
348;; (while (<= ch to)
349;; (modify-syntax-entry ch syntax)
350;; (modify-category-entry ch category)
351;; (setq ch (1+ ch)))
352;; (setq i (1+ i)))
353;; (setq deflist (cdr deflist))))
9395eb7c 354
4ed46869
KH
355;; Ethiopic character set
356
357(modify-category-entry (make-char 'ethiopic) ?e)
269a5dd0
DL
358;; (modify-syntax-entry (make-char 'ethiopic) "w")
359(dotimes (i (1+ (- #x137c #x1200)))
360 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
361(let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
362 ;; Unicode equivalents of the above:
363 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
c23d3872
KH
364 (while chars
365 (modify-syntax-entry (car chars) ".")
366 (setq chars (cdr chars))))
4ed46869 367
4ed46869
KH
368;; Greek character set (ISO-8859-7)
369
370(modify-category-entry (make-char 'greek-iso8859-7) ?g)
269a5dd0
DL
371(let ((c #x370))
372 (while (<= c #x3ff)
373 (modify-category-entry (decode-char 'ucs c) ?g)
4ed46869 374 (setq c (1+ c))))
269a5dd0
DL
375
376;; (let ((c 182))
377;; (while (< c 255)
378;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
379;; (setq c (1+ c))))
380;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
4ed46869
KH
381(modify-syntax-entry ?\e,F7\e(B ".")
382(modify-syntax-entry ?\e,F;\e(B ".")
383(modify-syntax-entry ?\e,F=\e(B ".")
4b7c7a40 384(let ((tbl (standard-case-table)))
269a5dd0
DL
385 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
386 ;; in several cases.
387 (set-case-syntax ?\e,F!\e(B "." tbl)
388 (set-case-syntax ?\e,F"\e(B "." tbl)
389 (set-case-syntax ?\e,F&\e(B "." tbl)
390 (set-case-syntax ?\e,F&\e(B "_" tbl)
391 (set-case-syntax ?\e,F'\e(B "." tbl)
392 (set-case-syntax ?\e,F)\e(B "_" tbl)
393 (set-case-syntax ?\e,F+\e(B "." tbl)
394 (set-case-syntax ?\e,F,\e(B "_" tbl)
395 (set-case-syntax ?\e,F-\e(B "_" tbl)
396 (set-case-syntax ?\e,F/\e(B "." tbl)
397 (set-case-syntax ?\e,F0\e(B "_" tbl)
398 (set-case-syntax ?\e,F1\e(B "_" tbl)
399;; (set-case-syntax ?\e,F7\e(B "_" tbl)
400;; (set-case-syntax ?\e,F=\e(B "_" tbl)
4b7c7a40
DL
401 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
402 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
403 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
404 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
405 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
406 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
407 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
408 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
409 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
410 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
411 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
412 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
413 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
414 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
415 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
416 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
417 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
418 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
419 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
420 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
421 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
422 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
423 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
424 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
425 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
426 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
427 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
428 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
429 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
430 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
431 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
432 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
269a5dd0
DL
433 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
434 ;; Unicode equivalents
435 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
436 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
437 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
438 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
439 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
440 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
441 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
442 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
443 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
444 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
445 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
446 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
447 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
448 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
449 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
450 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
451 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
452 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
453 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
454 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
455 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
456 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
457 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
458 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
459 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
460 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
461 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
462 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
463 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
464 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
465 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
466 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
467 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
4ed46869
KH
468
469;; Hebrew character set (ISO-8859-8)
470
227f528e
EZ
471(modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
472(let ((c #x591))
473 (while (<= c #x5f4)
474 (modify-category-entry (decode-char 'ucs c) ?w)
475 (setq c (1+ c))))
476
477(modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
478(modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
479(modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
480(modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
481(modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
482(modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
483(modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
4ed46869 484
269a5dd0
DL
485;; (let ((c 224))
486;; (while (< c 251)
487;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
488;; (setq c (1+ c))))
489;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
4ed46869
KH
490
491;; Indian character set (IS 13194 and other Emacs original Indian charsets)
492
493(modify-category-entry (make-char 'indian-is13194) ?i)
494(modify-category-entry (make-char 'indian-2-column) ?I)
69e138b2 495(modify-category-entry (make-char 'indian-glyph) ?I)
269a5dd0
DL
496;; Unicode Devanagari block
497(let ((c #x901))
498 (while (<= c #x970)
499 (modify-category-entry (decode-char 'ucs c) ?i)
500 (setq c (1+ c))))
4ed46869 501
269a5dd0
DL
502;;; Commented out since the categories appear not to be used anywhere
503;;; and word syntax is the default.
504;; (let ((deflist ;
505;; '(;; chars syntax category
506;; ("\e(5!"#\e(B" "w" ?7) ; vowel-modifying diacritical mark
507;; ; chandrabindu, anuswar, visarga
508;; ("\e(5$\e(B-\e(52\e(B" "w" ?1) ; base (independent) vowel
509;; ("\e(53\e(B-\e(5X\e(B" "w" ?0) ; consonant
510;; ("\e(5Z\e(B-\e(5g\e(B" "w" ?8) ; matra
511;; ("\e(5q\e(B-\e(5z\e(B" "w" ?6) ; digit
512;; ))
513;; elm chars len syntax category to ch i)
514;; (while deflist
515;; (setq elm (car deflist))
516;; (setq chars (car elm)
517;; len (length chars)
518;; syntax (nth 1 elm)
519;; category (nth 2 elm)
520;; i 0)
521;; (while (< i len)
522;; (if (= (aref chars i) ?-)
523;; (setq i (1+ i)
524;; to (aref chars i))
525;; (setq ch (aref chars i)
526;; to ch))
527;; (while (<= ch to)
528;; (modify-syntax-entry ch syntax)
529;; (modify-category-entry ch category)
530;; (setq ch (1+ ch)))
531;; (setq i (1+ i)))
532;; (setq deflist (cdr deflist))))
4a027a0d
KH
533
534
4ed46869
KH
535;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
536
537(modify-category-entry (make-char 'katakana-jisx0201) ?k)
226e4119 538(modify-category-entry (make-char 'katakana-jisx0201) ?j)
4ed46869
KH
539(modify-category-entry (make-char 'latin-jisx0201) ?r)
540(modify-category-entry (make-char 'japanese-jisx0208) ?j)
541(modify-category-entry (make-char 'japanese-jisx0212) ?j)
9395eb7c 542(modify-category-entry (make-char 'katakana-jisx0201) ?\|)
4ed46869 543(modify-category-entry (make-char 'japanese-jisx0208) ?\|)
9395eb7c 544(modify-category-entry (make-char 'japanese-jisx0212) ?\|)
4ed46869 545
269a5dd0
DL
546;; Unicode equivalents of JISX0201-kana
547(let ((c #xff61))
548 (while (<= c #xff9f)
549 (modify-category-entry (decode-char 'ucs c) ?k)
550 (modify-category-entry (decode-char 'ucs c) ?j)
551 (modify-category-entry (decode-char 'ucs c) ?\|)
552 (setq c (1+ c))))
553
554;; Katakana block
555(let ((c #x30a0))
556 (while (<= c #x30ff)
557 ;; ?K is double width, ?k isn't specified
558 (modify-category-entry (decode-char 'ucs c) ?k)
559 (modify-category-entry (decode-char 'ucs c) ?j)
a1506d29 560 (modify-category-entry (decode-char 'ucs c) ?\|)
269a5dd0
DL
561 (setq c (1+ c))))
562
563;; Hiragana block
564(let ((c #x3040))
565 (while (<= c #x309f)
566 ;; ?H is actually defined to be double width
567 (modify-category-entry (decode-char 'ucs c) ?H)
568 ;;(modify-category-entry (decode-char 'ucs c) ?j)
a1506d29 569 (modify-category-entry (decode-char 'ucs c) ?\|)
269a5dd0
DL
570 (setq c (1+ c))))
571
4ed46869 572;; JISX0208
269a5dd0 573;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
4ed46869
KH
574(modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
575(modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
576(modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
cf6af551
MB
577(let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
578 (while chars
579 (modify-syntax-entry (car chars) "w")
580 (setq chars (cdr chars))))
4ed46869
KH
581(modify-syntax-entry ?\\e$B!J\e(B "(\e$B!K\e(B")
582(modify-syntax-entry ?\\e$B!N\e(B "(\e$B!O\e(B")
583(modify-syntax-entry ?\\e$B!P\e(B "(\e$B!Q\e(B")
584(modify-syntax-entry ?\\e$B!V\e(B "(\e$B!W\e(B")
585(modify-syntax-entry ?\\e$B!X\e(B "(\e$B!Y\e(B")
586(modify-syntax-entry ?\\e$B!K\e(B ")\e$B!J\e(B")
587(modify-syntax-entry ?\\e$B!O\e(B ")\e$B!N\e(B")
588(modify-syntax-entry ?\\e$B!Q\e(B ")\e$B!P\e(B")
589(modify-syntax-entry ?\\e$B!W\e(B ")\e$B!V\e(B")
590(modify-syntax-entry ?\\e$B!Y\e(B ")\e$B!X\e(B")
591
592(modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
593(modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
594(modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
595(modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
596(modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
597(let ((row 48))
598 (while (< row 127)
599 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
600 (setq row (1+ row))))
210dede6
KH
601(modify-category-entry ?\e$B!<\e(B ?K)
602(let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
4ed46869
KH
603 (while chars
604 (modify-category-entry (car chars) ?K)
605 (modify-category-entry (car chars) ?H)
606 (setq chars (cdr chars))))
607(let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
608 (while chars
609 (modify-category-entry (car chars) ?C)
610 (setq chars (cdr chars))))
611
612;; JISX0212
269a5dd0 613;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
4ed46869
KH
614(modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
615(modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
616(modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
617
618(modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
619
620;; JISX0201-Kana
269a5dd0
DL
621;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
622(let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
623 ;; Unicode:
624 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
4ed46869
KH
625 (while chars
626 (modify-syntax-entry (car chars) ".")
627 (setq chars (cdr chars))))
628
226e4119
KH
629(modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
630(modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
631
4ed46869
KH
632;; Korean character set (KSC5601)
633
269a5dd0 634;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
4ed46869
KH
635(modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
636(modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
637(modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
638(modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
639(modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
640(modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
641
642(modify-category-entry (make-char 'korean-ksc5601) ?h)
643(modify-category-entry (make-char 'korean-ksc5601 35) ?A)
644(modify-category-entry (make-char 'korean-ksc5601 37) ?G)
645(modify-category-entry (make-char 'korean-ksc5601 42) ?H)
646(modify-category-entry (make-char 'korean-ksc5601 43) ?K)
647(modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
648
d05cfa1f
KH
649;; Latin character set (latin-1,2,3,4,5,8,9)
650
651(modify-category-entry (make-char 'latin-iso8859-1) ?l)
652(modify-category-entry (make-char 'latin-iso8859-2) ?l)
653(modify-category-entry (make-char 'latin-iso8859-3) ?l)
654(modify-category-entry (make-char 'latin-iso8859-4) ?l)
655(modify-category-entry (make-char 'latin-iso8859-9) ?l)
656(modify-category-entry (make-char 'latin-iso8859-14) ?l)
657(modify-category-entry (make-char 'latin-iso8859-15) ?l)
658
659(modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
660(modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
661(modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
662(modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
663(modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
664(modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
665(modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
666
6eba8645
KH
667;; Lao character set
668
669(modify-category-entry (make-char 'lao) ?o)
269a5dd0
DL
670(dotimes (i (1+ (- #xeff #xe80)))
671 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
6eba8645
KH
672
673(let ((deflist '(;; chars syntax category
674 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
675 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
676 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
677 ("\e(1XY\e(B" "w" ?3) ; vowel lower
a1506d29 678 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
6eba8645 679 ("\e(1\\e(B" "w" ?9) ; semivowel lower
9765a2ba 680 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
d5f05a67 681 ("\e(1Of\e(B" "_" ?5) ; symbol
269a5dd0
DL
682 ;; Unicode equivalents
683 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
684 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
685 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
686 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
a1506d29 687 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
269a5dd0
DL
688 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
689 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
690 ("\e$,1DODf\e(B" "_" ?5) ; symbol
6eba8645
KH
691 ))
692 elm chars len syntax category to ch i)
693 (while deflist
694 (setq elm (car deflist))
695 (setq chars (car elm)
696 len (length chars)
697 syntax (nth 1 elm)
698 category (nth 2 elm)
699 i 0)
700 (while (< i len)
701 (if (= (aref chars i) ?-)
702 (setq i (1+ i)
4a027a0d
KH
703 to (aref chars i))
704 (setq ch (aref chars i)
6eba8645
KH
705 to ch))
706 (while (<= ch to)
269a5dd0
DL
707 (unless (string-equal syntax "w")
708 (modify-syntax-entry ch syntax))
6eba8645
KH
709 (modify-category-entry ch category)
710 (setq ch (1+ ch)))
4a027a0d 711 (setq i (1+ i)))
6eba8645
KH
712 (setq deflist (cdr deflist))))
713
4ed46869
KH
714;; Thai character set (TIS620)
715
716(modify-category-entry (make-char 'thai-tis620) ?t)
269a5dd0
DL
717(dotimes (i (1+ (- #xe7f #xe00)))
718 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
4ed46869
KH
719
720(let ((deflist '(;; chars syntax category
721 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
722 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
723 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
724 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
a1506d29 725 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
9765a2ba
KH
726 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
727 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
269a5dd0
DL
728 ;; Unicode equivalents
729 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
730 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
731 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
732 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
a1506d29 733 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
269a5dd0
DL
734 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
735 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
4ed46869
KH
736 ))
737 elm chars len syntax category to ch i)
9395eb7c
KH
738 (while deflist
739 (setq elm (car deflist))
740 (setq chars (car elm)
741 len (length chars)
742 syntax (nth 1 elm)
743 category (nth 2 elm)
744 i 0)
745 (while (< i len)
746 (if (= (aref chars i) ?-)
747 (setq i (1+ i)
4a027a0d
KH
748 to (aref chars i))
749 (setq ch (aref chars i)
9395eb7c
KH
750 to ch))
751 (while (<= ch to)
269a5dd0
DL
752 (unless (string-equal syntax "w")
753 (modify-syntax-entry ch syntax))
9395eb7c
KH
754 (modify-category-entry ch category)
755 (setq ch (1+ ch)))
4a027a0d 756 (setq i (1+ i)))
9395eb7c
KH
757 (setq deflist (cdr deflist))))
758
759;; Tibetan character set
760
16230888
KH
761(modify-category-entry (make-char 'tibetan) ?q)
762(modify-category-entry (make-char 'tibetan-1-column) ?q)
269a5dd0
DL
763(dotimes (i (1+ (- #xfff #xf00)))
764 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
9395eb7c
KH
765
766(let ((deflist '(;; chars syntax category
269a5dd0 767 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
e6f02372 768 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
9395eb7c
KH
769 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
770 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
771 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
772 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
e6f02372 773 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
9395eb7c
KH
774 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
775 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
776 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
777 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
778 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
779 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
e6f02372 780 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
269a5dd0
DL
781
782 ;; Unicode version (not complete)
783 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
784 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
785 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
786 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
787 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
788 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
789 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
790 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
791 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
792 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
9395eb7c
KH
793 ))
794 elm chars len syntax category to ch i)
4ed46869
KH
795 (while deflist
796 (setq elm (car deflist))
797 (setq chars (car elm)
798 len (length chars)
799 syntax (nth 1 elm)
800 category (nth 2 elm)
801 i 0)
802 (while (< i len)
803 (if (= (aref chars i) ?-)
804 (setq i (1+ i)
4a027a0d
KH
805 to (aref chars i))
806 (setq ch (aref chars i)
4ed46869
KH
807 to ch))
808 (while (<= ch to)
269a5dd0
DL
809 (unless (string-equal syntax "w")
810 (modify-syntax-entry ch syntax))
4ed46869
KH
811 (modify-category-entry ch category)
812 (setq ch (1+ ch)))
4a027a0d 813 (setq i (1+ i)))
4ed46869
KH
814 (setq deflist (cdr deflist))))
815
816;; Vietnamese character set
817
818(let ((lower (make-char 'vietnamese-viscii-lower))
819 (upper (make-char 'vietnamese-viscii-upper)))
269a5dd0
DL
820;; (modify-syntax-entry lower "w")
821;; (modify-syntax-entry upper "w")
4ed46869
KH
822 (modify-category-entry lower ?v)
823 (modify-category-entry upper ?v)
824 (modify-category-entry lower ?l) ; To make a word with
825 (modify-category-entry upper ?l) ; latin characters.
826 )
827
e5dd1155
KH
828(let ((tbl (standard-case-table))
829 (i 32))
830 (while (< i 128)
831 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
832 (make-char 'vietnamese-viscii-lower i)
833 tbl)
834 (setq i (1+ i))))
835
d05cfa1f
KH
836;; Unicode (mule-unicode-0100-24ff)
837
85ef8ece
KH
838(let ((tbl (standard-case-table)) c)
839
840;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
841;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
842;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
843;; Thus we have to check language-environment to handle casing
844;; correctly. Currently only I<->i is available.
845
85ef8ece
KH
846 ;; Latin Extended-A, Latin Extended-B
847 (setq c #x0100)
848 (while (<= c #x0233)
d05cfa1f 849 (modify-category-entry (decode-char 'ucs c) ?l)
85ef8ece
KH
850 (and (or (<= c #x012e)
851 (and (>= c #x014a) (<= c #x0177)))
d05cfa1f
KH
852 (zerop (% c 2))
853 (set-case-syntax-pair
854 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
85ef8ece
KH
855 (and (>= c #x013a)
856 (<= c #x0148)
857 (zerop (% c 2))
858 (set-case-syntax-pair
859 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
d05cfa1f 860 (setq c (1+ c)))
85ef8ece
KH
861 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
862 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
863 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
8325c01e 864;;; (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl) ; these two have different length!
d05cfa1f
KH
865 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
866 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
867 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
868
269a5dd0
DL
869 ;; Latin Extended-B
870 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
871 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
872 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
873 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
874 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
875 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
876 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
877 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
878 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
879 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
880 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
881 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
882 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
883 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
884 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
885 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
886 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
887 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
888 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
889 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
890 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
891 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
892 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
893 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
894 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
895 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
896 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
897 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
898 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
899 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
900 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
901 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
902 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
903 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
904 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
905 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
906 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
907 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
908 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
909 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
910 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
911 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
912 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
913 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
914 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
915 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
916 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
917 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
918 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
919 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
920 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
921 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
922 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
923 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
924 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
925 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
926 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
927 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
928 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
929 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
930 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
931 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
932 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
933 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
934 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
935 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
936 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
937 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
938 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
939 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
940 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
941 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
942 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
943 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
944 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
945 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
946 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
947 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
948 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
949 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
950 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
951 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
952 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
953 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
954 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
955 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
956 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
957 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
958 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
959 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
960 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
961 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
962 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
963 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
964
85ef8ece
KH
965 ;; Latin Extended Additional
966 (setq c #x1e00)
d05cfa1f
KH
967 (while (<= c #x1ef9)
968 (modify-category-entry (decode-char 'ucs c) ?l)
969 (and (zerop (% c 2))
970 (or (<= c #x1e94) (>= c #x1ea0))
971 (set-case-syntax-pair
972 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
973 (setq c (1+ c)))
974
85ef8ece
KH
975 ;; Greek
976 (setq c #x0370)
d05cfa1f
KH
977 (while (<= c #x03ff)
978 (modify-category-entry (decode-char 'ucs c) ?g)
979 (if (or (and (>= c #x0391) (<= c #x03a1))
980 (and (>= c #x03a3) (<= c #x03ab)))
981 (set-case-syntax-pair
982 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
983 (and (>= c #x03da)
984 (<= c #x03ee)
985 (zerop (% c 2))
986 (set-case-syntax-pair
987 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
988 (setq c (1+ c)))
989 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
990 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
991 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
992 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
993 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
994 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
995 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
996
269a5dd0
DL
997 ;; Armenian
998 (setq c #x531)
999 (while (<= c #x556)
1000 (set-case-syntax-pair (decode-char 'ucs c)
1001 (decode-char 'ucs (+ c #x30)) tbl)
1002 (setq c (1+ c)))
1003
85ef8ece
KH
1004 ;; Greek Extended
1005 (setq c #x1f00)
d05cfa1f
KH
1006 (while (<= c #x1fff)
1007 (modify-category-entry (decode-char 'ucs c) ?g)
1008 (and (<= (logand c #x000f) 7)
1009 (<= c #x1fa7)
1010 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1011 (/= (logand c #x00f0) 7)
1012 (set-case-syntax-pair
1013 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1014 (setq c (1+ c)))
1015 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1016 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1017 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1018 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1019 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1020 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1021 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1022 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1023 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1024 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1025 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1026 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1027 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1028 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1029 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1030 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1031 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1032 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1033 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1034 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1035 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1036 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1037 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1038 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1039
85ef8ece
KH
1040 ;; cyrillic
1041 (setq c #x0400)
d05cfa1f
KH
1042 (while (<= c #x04ff)
1043 (modify-category-entry (decode-char 'ucs c) ?y)
1044 (and (>= c #x0400)
1045 (<= c #x040f)
1046 (set-case-syntax-pair
1047 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1048 (and (>= c #x0410)
1049 (<= c #x042f)
1050 (set-case-syntax-pair
1051 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1052 (and (zerop (% c 2))
1053 (or (and (>= c #x0460) (<= c #x0480))
1054 (and (>= c #x048c) (<= c #x04be))
1055 (and (>= c #x04d0) (<= c #x04f4)))
1056 (set-case-syntax-pair
a1506d29 1057 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
d05cfa1f
KH
1058 (setq c (1+ c)))
1059 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1060 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1061 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1062 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1063 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1064
85ef8ece
KH
1065 ;; general punctuation
1066 (setq c #x2000)
d05cfa1f 1067 (while (<= c #x200b)
348a6a50
DL
1068 (set-case-syntax (decode-char 'ucs c) " " tbl)
1069 (setq c (decode-char 'ucs (1+ c))))
d05cfa1f
KH
1070 (setq c #x2010)
1071 (while (<= c #x2027)
348a6a50
DL
1072 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1073 (setq c (decode-char 'ucs (1+ c))))
d05cfa1f 1074
269a5dd0
DL
1075 ;; Roman numerals
1076 (setq c #x2160)
1077 (while (<= c #x216f)
1078 (set-case-syntax-pair (decode-char 'ucs c)
1079 (decode-char 'ucs (+ c #x10)) tbl)
1080 (setq c (1+ c)))
1081
1082 ;; Circled Latin
1083 (setq c #x24b6)
1084 (while (<= c #x24cf)
1085 (set-case-syntax-pair (decode-char 'ucs c)
1086 (decode-char 'ucs (+ c 26)) tbl)
1087 (modify-category-entry (decode-char 'ucs c) ?l)
1088 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1089 (setq c (1+ c)))
1090
1091 ;; Fullwidth Latin
1092 (setq c #xff21)
1093 (while (<= c #xff3a)
1094 (set-case-syntax-pair (decode-char 'ucs c)
1095 (decode-char 'ucs (+ c #x20)) tbl)
1096 (modify-category-entry (decode-char 'ucs c) ?l)
1097 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1098 (setq c (1+ c)))
1099
269a5dd0
DL
1100 ;; Combining diacritics
1101 (setq c #x300)
1102 (while (<= c #x362)
1103 (modify-category-entry (decode-char 'ucs c) ?^)
1104 (setq c (1+ c)))
1105
1106 ;; Combining marks
1107 (setq c #x20d0)
1108 (while (<= c #x20e3)
1109 (modify-category-entry (decode-char 'ucs c) ?^)
1110 (setq c (1+ c)))
1111
1112 ;; Fixme: syntax for symbols &c
1113 )
4ed46869
KH
1114\f
1115;;; Setting word boundary.
1116
1117(setq word-combining-categories
1118 '((?l . ?l)))
1119
1120(setq word-separating-categories ; (2-byte character sets)
1121 '((?A . ?K) ; Alpha numeric - Katakana
1122 (?A . ?C) ; Alpha numeric - Chinese
1123 (?H . ?A) ; Hiragana - Alpha numeric
1124 (?H . ?K) ; Hiragana - Katakana
1125 (?H . ?C) ; Hiragana - Chinese
1126 (?K . ?A) ; Katakana - Alpha numeric
1127 (?K . ?C) ; Katakana - Chinese
1128 (?C . ?A) ; Chinese - Alpha numeric
1129 (?C . ?K) ; Chinese - Katakana
1130 ))
777cfce6
KH
1131
1132\f
1133;; For each character set, put the information of the most proper
aaa9f206 1134;; coding system to encode it by `preferred-coding-system' property.
777cfce6
KH
1135
1136(let ((l '((latin-iso8859-1 . iso-latin-1)
1137 (latin-iso8859-2 . iso-latin-2)
1138 (latin-iso8859-3 . iso-latin-3)
1139 (latin-iso8859-4 . iso-latin-4)
1140 (thai-tis620 . thai-tis620)
1141 (greek-iso8859-7 . greek-iso-8bit)
1142 (arabic-iso8859-6 . iso-2022-7bit)
1143 (hebrew-iso8859-8 . hebrew-iso-8bit)
1144 (katakana-jisx0201 . japanese-shift-jis)
1145 (latin-jisx0201 . japanese-shift-jis)
1146 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1147 (latin-iso8859-9 . iso-latin-5)
1148 (japanese-jisx0208-1978 . iso-2022-jp)
1149 (chinese-gb2312 . cn-gb-2312)
1150 (japanese-jisx0208 . iso-2022-jp)
1151 (korean-ksc5601 . iso-2022-kr)
1152 (japanese-jisx0212 . iso-2022-jp)
1153 (chinese-cns11643-1 . iso-2022-cn)
1154 (chinese-cns11643-2 . iso-2022-cn)
1155 (chinese-big5-1 . chinese-big5)
1156 (chinese-big5-2 . chinese-big5)
1157 (chinese-sisheng . iso-2022-7bit)
1158 (ipa . iso-2022-7bit)
1159 (vietnamese-viscii-lower . vietnamese-viscii)
1160 (vietnamese-viscii-upper . vietnamese-viscii)
1161 (arabic-digit . iso-2022-7bit)
1162 (arabic-1-column . iso-2022-7bit)
1163 (ascii-right-to-left . iso-2022-7bit)
1164 (lao . lao)
1165 (arabic-2-column . iso-2022-7bit)
1166 (indian-is13194 . devanagari)
69e138b2 1167 (indian-glyph . devanagari)
777cfce6 1168 (tibetan-1-column . tibetan)
58cd41a3 1169 (ethiopic . iso-2022-7bit)
777cfce6
KH
1170 (chinese-cns11643-3 . iso-2022-cn)
1171 (chinese-cns11643-4 . iso-2022-cn)
1172 (chinese-cns11643-5 . iso-2022-cn)
1173 (chinese-cns11643-6 . iso-2022-cn)
1174 (chinese-cns11643-7 . iso-2022-cn)
1175 (indian-2-column . devanagari)
7a860cf2
DL
1176 (tibetan . tibetan)
1177 (latin-iso8859-14 . iso-latin-8)
1178 (latin-iso8859-15 . iso-latin-9))))
777cfce6 1179 (while l
aaa9f206 1180 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
777cfce6 1181 (setq l (cdr l))))
df0415c5
KH
1182
1183\f
98a663f1 1184;; Setup auto-fill-chars for charsets that should invoke auto-filling.
269a5dd0
DL
1185;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1186;; property on the charsets.
df0415c5
KH
1187(let ((l '(katakana-jisx0201
1188 japanese-jisx0208 japanese-jisx0212
1189 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1190 (while l
1191 (aset auto-fill-chars (make-char (car l)) t)
98a663f1 1192 (put-charset-property (car l) 'nospace-between-words t)
df0415c5 1193 (setq l (cdr l))))
777cfce6
KH
1194
1195;;; Local Variables:
1196;;; coding: iso-2022-7bit
1197;;; End:
1198
60370d40 1199;;; characters.el ends here