(windows-1250, windows-125[2-8])
[bpt/emacs.git] / lisp / international / characters.el
CommitLineData
4ed46869
KH
1;;; characters.el --- set syntax and category for multibyte characters
2
28636af6 3;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
fa526c4a 4;; Licensed to the Free Software Foundation.
cf6af551 5;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4ed46869
KH
6
7;; Keywords: multibyte character, character set, syntax, category
8
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
13;; the Free Software Foundation; either version 2, or (at your option)
14;; any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
369314dc
KH
22;; along with GNU Emacs; see the file COPYING. If not, write to the
23;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24;; Boston, MA 02111-1307, USA.
4ed46869
KH
25
26;;; Commentary:
27
28;; This file contains multibyte characters. Save this file always in
4b16fa0c 29;; the coding system `iso-2022-7bit'.
4ed46869 30
49adf443
RS
31;; This file does not define the syntax for Latin-N character sets;
32;; those are defined by the files latin-N.el.
33
60370d40
PJ
34;;; Code:
35
ae0916f8
KH
36;; We must set utf-translate-cjk-mode to nil while loading this file
37;; to avoid translating CJK characters in decode-char.
38(defvar saved-utf-translate-cjk-mode utf-translate-cjk-mode)
39(setq utf-translate-cjk-mode nil)
40
4ed46869
KH
41;;; Predefined categories.
42
43;; For each character set.
44
45(define-category ?a "ASCII")
46(define-category ?l "Latin")
47(define-category ?t "Thai")
48(define-category ?g "Greek")
49(define-category ?b "Arabic")
50(define-category ?w "Hebrew")
51(define-category ?y "Cyrillic")
52(define-category ?k "Japanese katakana")
53(define-category ?r "Japanese roman")
54(define-category ?c "Chinese")
55(define-category ?j "Japanese")
56(define-category ?h "Korean")
57(define-category ?e "Ethiopic (Ge'ez)")
58(define-category ?v "Vietnamese")
59(define-category ?i "Indian")
6eba8645 60(define-category ?o "Lao")
9395eb7c 61(define-category ?q "Tibetan")
4ed46869
KH
62
63;; For each group (row) of 2-byte character sets.
64
94487c4e 65(define-category ?A "Alpha-numeric characters of 2-byte character sets")
4ed46869 66(define-category ?C "Chinese (Han) characters of 2-byte character sets")
94487c4e 67(define-category ?G "Greek characters of 2-byte character sets")
4ed46869
KH
68(define-category ?H "Japanese Hiragana characters of 2-byte character sets")
69(define-category ?K "Japanese Katakana characters of 2-byte character sets")
70(define-category ?N "Korean Hangul characters of 2-byte character sets")
94487c4e 71(define-category ?Y "Cyrillic characters of 2-byte character sets")
4ed46869
KH
72(define-category ?I "Indian Glyphs")
73
74;; For phonetic classifications.
75
76(define-category ?0 "consonant")
9765a2ba 77(define-category ?1 "base (independent) vowel")
4ed46869
KH
78(define-category ?2 "upper diacritical mark (including upper vowel)")
79(define-category ?3 "lower diacritical mark (including lower vowel)")
80(define-category ?4 "tone mark")
9765a2ba 81(define-category ?5 "symbol")
4ed46869
KH
82(define-category ?6 "digit")
83(define-category ?7 "vowel-modifying diacritical mark")
6eba8645
KH
84(define-category ?8 "vowel-signs")
85(define-category ?9 "semivowel lower")
4ed46869
KH
86
87;; For filling.
88(define-category ?| "While filling, we can break a line at this character.")
89
504af7b2 90;; For indentation calculation.
70ea295a 91(define-category ?\s
777cfce6 92 "This character counts as a space for indentation purposes.")
504af7b2 93
94487c4e 94;; Keep the following for `kinsoku' processing. See comments in
4ed46869
KH
95;; kinsoku.el.
96(define-category ?> "A character which can't be placed at beginning of line.")
97(define-category ?< "A character which can't be placed at end of line.")
98
269a5dd0
DL
99;; Combining
100(define-category ?^ "Combining diacritic or mark")
4ed46869
KH
101\f
102;;; Setting syntax and category.
103
104;; ASCII
105
106(let ((ch 32))
107 (while (< ch 127) ; All ASCII characters have
108 (modify-category-entry ch ?a) ; the category `a' (ASCII)
109 (modify-category-entry ch ?l) ; and `l' (Latin).
110 (setq ch (1+ ch))))
111
112;; Arabic character set
113
114(let ((charsets '(arabic-iso8859-6
115 arabic-digit
116 arabic-1-column
117 arabic-2-column)))
118 (while charsets
269a5dd0 119;; (modify-syntax-entry (make-char (car charsets)) "w")
4ed46869
KH
120 (modify-category-entry (make-char (car charsets)) ?b)
121 (setq charsets (cdr charsets))))
269a5dd0
DL
122(let ((ch #x600))
123 (while (<= ch #x6ff)
124 (modify-category-entry (decode-char 'ucs ch) ?b)
125 (setq ch (1+ ch)))
126 (setq ch #xfb50)
127 (while (<= ch #xfdff)
128 (modify-category-entry (decode-char 'ucs ch) ?b)
129 (setq ch (1+ ch)))
130 (setq ch #xfe70)
131 (while (<= ch #xfefe)
132 (modify-category-entry (decode-char 'ucs ch) ?b)
133 (setq ch (1+ ch))))
4ed46869
KH
134
135;; Chinese character set (GB2312)
136
269a5dd0 137;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
4ed46869
KH
138(modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
139(modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
140(modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
141(modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
142(modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
143(modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
144(modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
145(modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
146(modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
147(modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
840f8f73
KH
148(modify-syntax-entry ?\\e$A#(\e(B "(\e$A#)\e(B")
149(modify-syntax-entry ?\\e$A#{\e(B "(\e$A#}\e(B")
150(modify-syntax-entry ?\\e$A#[\e(B "(\e$A#]\e(B")
4ed46869
KH
151(modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
152(modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
153(modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
154(modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
155(modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
156(modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
157(modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
840f8f73
KH
158(modify-syntax-entry ?\\e$A#)\e(B ")\e$A#(\e(B")
159(modify-syntax-entry ?\\e$A#}\e(B ")\e$A#{\e(B")
160(modify-syntax-entry ?\\e$A#]\e(B ")\e$A#[\e(B")
4ed46869 161
840f8f73
KH
162(let ((chars "\e$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d\e(B"))
163 (dotimes (i (length chars))
164 (modify-syntax-entry (aref chars i) ".")))
165
4ed46869
KH
166(modify-category-entry (make-char 'chinese-gb2312) ?c)
167(modify-category-entry (make-char 'chinese-gb2312) ?\|)
168(modify-category-entry (make-char 'chinese-gb2312 35) ?A)
169(modify-category-entry (make-char 'chinese-gb2312 36) ?H)
170(modify-category-entry (make-char 'chinese-gb2312 37) ?K)
171(modify-category-entry (make-char 'chinese-gb2312 38) ?G)
172(modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
4ed46869
KH
173(let ((row 48))
174 (while (< row 127)
175 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
176 (setq row (1+ row))))
177
178;; Chinese character set (BIG5)
179
840f8f73
KH
180(let ((from (decode-big5-char #xA141))
181 (to (decode-big5-char #xA15D)))
182 (while (< from to)
183 (modify-syntax-entry from ".")
184 (setq from (1+ from))))
185(let ((from (decode-big5-char #xA1A5))
186 (to (decode-big5-char #xA1AD)))
187 (while (< from to)
188 (modify-syntax-entry from ".")
189 (setq from (1+ from))))
190(let ((from (decode-big5-char #xA1AD))
191 (to (decode-big5-char #xA2AF)))
192 (while (< from to)
193 (modify-syntax-entry from "_")
194 (setq from (1+ from))))
195
196(let ((parens "\e$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
197 open close)
198 (dotimes (i (/ (length parens) 2))
199 (setq open (aref parens (* i 2))
200 close (aref parens (1+ (* i 2))))
201 (modify-syntax-entry open (format "(%c" close))
202 (modify-syntax-entry close (format ")%c" open))))
203
4ed46869
KH
204(let ((generic-big5-1-char (make-char 'chinese-big5-1))
205 (generic-big5-2-char (make-char 'chinese-big5-2)))
269a5dd0
DL
206;; (modify-syntax-entry generic-big5-1-char "w")
207;; (modify-syntax-entry generic-big5-2-char "w")
4ed46869
KH
208
209 (modify-category-entry generic-big5-1-char ?c)
210 (modify-category-entry generic-big5-2-char ?c)
211
212 (modify-category-entry generic-big5-1-char ?C)
213 (modify-category-entry generic-big5-2-char ?C)
214
215 (modify-category-entry generic-big5-1-char ?\|)
216 (modify-category-entry generic-big5-2-char ?\|))
217
218
219;; Chinese character set (CNS11643)
220
221(let ((cns-list '(chinese-cns11643-1
222 chinese-cns11643-2
223 chinese-cns11643-3
224 chinese-cns11643-4
225 chinese-cns11643-5
226 chinese-cns11643-6
227 chinese-cns11643-7))
228 generic-char)
229 (while cns-list
230 (setq generic-char (make-char (car cns-list)))
269a5dd0 231;; (modify-syntax-entry generic-char "w")
4ed46869
KH
232 (modify-category-entry generic-char ?c)
233 (modify-category-entry generic-char ?C)
234 (modify-category-entry generic-char ?|)
235 (setq cns-list (cdr cns-list))))
236
70abfe90
KH
237(let ((parens "\e$(G!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
238 open close)
239 (dotimes (i (/ (length parens) 2))
240 (setq open (aref parens (* i 2))
241 close (aref parens (1+ (* i 2))))
242 (modify-syntax-entry open (format "(%c" close))
243 (modify-syntax-entry close (format ")%c" open))))
244
4ed46869
KH
245;; Cyrillic character set (ISO-8859-5)
246
247(modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
248
28636af6 249(modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
4ed46869
KH
250(modify-syntax-entry ?\e,L-\e(B ".")
251(modify-syntax-entry ?\e,Lp\e(B ".")
252(modify-syntax-entry ?\e,L}\e(B ".")
28636af6
RS
253(let ((tbl (standard-case-table)))
254 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
255 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
256 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
257 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
258 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
259 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
260 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
261 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
262 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
263 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
264 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
265 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
266 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
267 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
268 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
269 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
270 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
271 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
272 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
273 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
274 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
275 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
276 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
277 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
278 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
279 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
280 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
281 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
282 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
283 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
284 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
285 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
286 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
287 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
288 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
289 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
290 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
291 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
292 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
293 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
294 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
295 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
296 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
297 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
298 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
269a5dd0
DL
299 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
300 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
301 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
302 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
303 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
304 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
305 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
306 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
307 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
308 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
309 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
310 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
311 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
312 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
313 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
314 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
315 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
316 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
317 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
318 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
319 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
320 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
321 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
322 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
323 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
324 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
325 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
326 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
327 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
328 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
329 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
330 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
331 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
332 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
333 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
334 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
335 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
336 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
337 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
338 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
339 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
340 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
341 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
342 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
343 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
344 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
345 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
4ed46869 346
9395eb7c
KH
347;; Devanagari character set
348
269a5dd0
DL
349;;; Commented out since the categories appear not to be used anywhere
350;;; and word syntax is the default.
351;; (let ((deflist '(;; chars syntax category
352;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
353;; ; chandrabindu, anuswar, visarga
354;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
355;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
356;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
357;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
358;; ;; Unicode equivalents
359;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
360;; ; chandrabindu, anuswar, visarga
361;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
362;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
363;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
364;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
365;; ))
366;; elm chars len syntax category to ch i)
367;; (while deflist
368;; (setq elm (car deflist))
369;; (setq chars (car elm)
370;; len (length chars)
371;; syntax (nth 1 elm)
372;; category (nth 2 elm)
373;; i 0)
374;; (while (< i len)
375;; (if (= (aref chars i) ?-)
376;; (setq i (1+ i)
377;; to (aref chars i))
378;; (setq ch (aref chars i)
379;; to ch))
380;; (while (<= ch to)
381;; (modify-syntax-entry ch syntax)
382;; (modify-category-entry ch category)
383;; (setq ch (1+ ch)))
384;; (setq i (1+ i)))
385;; (setq deflist (cdr deflist))))
9395eb7c 386
4ed46869
KH
387;; Ethiopic character set
388
389(modify-category-entry (make-char 'ethiopic) ?e)
269a5dd0
DL
390;; (modify-syntax-entry (make-char 'ethiopic) "w")
391(dotimes (i (1+ (- #x137c #x1200)))
392 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
393(let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
394 ;; Unicode equivalents of the above:
395 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
c23d3872
KH
396 (while chars
397 (modify-syntax-entry (car chars) ".")
398 (setq chars (cdr chars))))
4ed46869 399
4ed46869
KH
400;; Greek character set (ISO-8859-7)
401
402(modify-category-entry (make-char 'greek-iso8859-7) ?g)
269a5dd0
DL
403(let ((c #x370))
404 (while (<= c #x3ff)
405 (modify-category-entry (decode-char 'ucs c) ?g)
4ed46869 406 (setq c (1+ c))))
269a5dd0
DL
407
408;; (let ((c 182))
409;; (while (< c 255)
410;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
411;; (setq c (1+ c))))
412;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
4ed46869
KH
413(modify-syntax-entry ?\e,F7\e(B ".")
414(modify-syntax-entry ?\e,F;\e(B ".")
415(modify-syntax-entry ?\e,F=\e(B ".")
4b7c7a40 416(let ((tbl (standard-case-table)))
269a5dd0
DL
417 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
418 ;; in several cases.
419 (set-case-syntax ?\e,F!\e(B "." tbl)
420 (set-case-syntax ?\e,F"\e(B "." tbl)
421 (set-case-syntax ?\e,F&\e(B "." tbl)
422 (set-case-syntax ?\e,F&\e(B "_" tbl)
423 (set-case-syntax ?\e,F'\e(B "." tbl)
424 (set-case-syntax ?\e,F)\e(B "_" tbl)
425 (set-case-syntax ?\e,F+\e(B "." tbl)
426 (set-case-syntax ?\e,F,\e(B "_" tbl)
427 (set-case-syntax ?\e,F-\e(B "_" tbl)
428 (set-case-syntax ?\e,F/\e(B "." tbl)
429 (set-case-syntax ?\e,F0\e(B "_" tbl)
430 (set-case-syntax ?\e,F1\e(B "_" tbl)
431;; (set-case-syntax ?\e,F7\e(B "_" tbl)
432;; (set-case-syntax ?\e,F=\e(B "_" tbl)
4b7c7a40
DL
433 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
434 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
435 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
436 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
437 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
438 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
439 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
440 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
441 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
442 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
443 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
444 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
445 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
446 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
447 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
448 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
449 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
e0a65b4d 450 (set-upcase-syntax ?\e,FS\e(B ?\e,Fr\e(B tbl)
4b7c7a40
DL
451 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
452 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
453 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
454 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
455 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
456 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
457 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
458 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
459 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
460 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
461 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
462 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
463 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
464 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
465 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
269a5dd0
DL
466 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
467 ;; Unicode equivalents
468 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
469 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
470 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
471 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
472 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
473 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
474 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
475 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
476 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
477 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
478 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
479 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
480 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
481 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
482 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
483 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
484 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
e0a65b4d 485 (set-upcase-syntax ?\e$,1'#\e(B ?\e$,1'B\e(B tbl)
269a5dd0
DL
486 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
487 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
488 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
489 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
490 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
491 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
492 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
493 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
494 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
495 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
496 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
497 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
498 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
499 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
500 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
501 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
4ed46869
KH
502
503;; Hebrew character set (ISO-8859-8)
504
227f528e
EZ
505(modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
506(let ((c #x591))
507 (while (<= c #x5f4)
508 (modify-category-entry (decode-char 'ucs c) ?w)
509 (setq c (1+ c))))
510
511(modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
512(modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
513(modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
514(modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
515(modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
516(modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
517(modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
4ed46869 518
269a5dd0
DL
519;; (let ((c 224))
520;; (while (< c 251)
521;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
522;; (setq c (1+ c))))
523;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
4ed46869
KH
524
525;; Indian character set (IS 13194 and other Emacs original Indian charsets)
526
527(modify-category-entry (make-char 'indian-is13194) ?i)
528(modify-category-entry (make-char 'indian-2-column) ?I)
69e138b2 529(modify-category-entry (make-char 'indian-glyph) ?I)
269a5dd0
DL
530;; Unicode Devanagari block
531(let ((c #x901))
532 (while (<= c #x970)
533 (modify-category-entry (decode-char 'ucs c) ?i)
534 (setq c (1+ c))))
4ed46869 535
ed459fb6
KH
536(let ((l '(;; RANGE CATEGORY MEANINGS
537 (#x01 #x03 ?7) ; vowel modifier
538 (#x05 #x14 ?1) ; base vowel
539 (#x15 #x39 ?0) ; consonants
540 (#x3e #x4d ?8) ; vowel modifier
541 (#x51 #x54 ?4) ; stress/tone mark
542 (#x58 #x5f ?0) ; consonants
543 (#x60 #x61 ?1) ; base vowel
544 (#x62 #x63 ?8) ; vowel modifier
545 (#x66 #x6f ?6) ; digits
546 )))
547 (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00))
548 (dolist (elt2 l)
549 (let* ((from (car elt2))
550 (counts (1+ (- (nth 1 elt2) from)))
551 (category (nth 2 elt2)))
552 (dotimes (i counts)
553 (modify-category-entry (decode-char 'ucs (+ elt1 from i))
554 category))))))
4a027a0d 555
4ed46869
KH
556;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
557
558(modify-category-entry (make-char 'katakana-jisx0201) ?k)
226e4119 559(modify-category-entry (make-char 'katakana-jisx0201) ?j)
4ed46869
KH
560(modify-category-entry (make-char 'latin-jisx0201) ?r)
561(modify-category-entry (make-char 'japanese-jisx0208) ?j)
562(modify-category-entry (make-char 'japanese-jisx0212) ?j)
9395eb7c 563(modify-category-entry (make-char 'katakana-jisx0201) ?\|)
4ed46869 564(modify-category-entry (make-char 'japanese-jisx0208) ?\|)
9395eb7c 565(modify-category-entry (make-char 'japanese-jisx0212) ?\|)
4ed46869 566
269a5dd0
DL
567;; Unicode equivalents of JISX0201-kana
568(let ((c #xff61))
569 (while (<= c #xff9f)
570 (modify-category-entry (decode-char 'ucs c) ?k)
571 (modify-category-entry (decode-char 'ucs c) ?j)
572 (modify-category-entry (decode-char 'ucs c) ?\|)
573 (setq c (1+ c))))
574
575;; Katakana block
576(let ((c #x30a0))
577 (while (<= c #x30ff)
578 ;; ?K is double width, ?k isn't specified
579 (modify-category-entry (decode-char 'ucs c) ?k)
580 (modify-category-entry (decode-char 'ucs c) ?j)
a1506d29 581 (modify-category-entry (decode-char 'ucs c) ?\|)
269a5dd0
DL
582 (setq c (1+ c))))
583
584;; Hiragana block
585(let ((c #x3040))
586 (while (<= c #x309f)
587 ;; ?H is actually defined to be double width
588 (modify-category-entry (decode-char 'ucs c) ?H)
589 ;;(modify-category-entry (decode-char 'ucs c) ?j)
a1506d29 590 (modify-category-entry (decode-char 'ucs c) ?\|)
269a5dd0
DL
591 (setq c (1+ c))))
592
4ed46869 593;; JISX0208
269a5dd0 594;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
4ed46869
KH
595(modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
596(modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
597(modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
cf6af551
MB
598(let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
599 (while chars
600 (modify-syntax-entry (car chars) "w")
601 (setq chars (cdr chars))))
70abfe90
KH
602(let ((parens "\e$B!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![\e(B" )
603 open close)
604 (dotimes (i (/ (length parens) 2))
605 (setq open (aref parens (* i 2))
606 close (aref parens (1+ (* i 2))))
607 (modify-syntax-entry open (format "(%c" close))
608 (modify-syntax-entry close (format ")%c" open))))
4ed46869
KH
609
610(modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
611(modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
612(modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
613(modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
614(modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
615(let ((row 48))
616 (while (< row 127)
617 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
618 (setq row (1+ row))))
210dede6
KH
619(modify-category-entry ?\e$B!<\e(B ?K)
620(let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
4ed46869
KH
621 (while chars
622 (modify-category-entry (car chars) ?K)
623 (modify-category-entry (car chars) ?H)
624 (setq chars (cdr chars))))
625(let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
626 (while chars
627 (modify-category-entry (car chars) ?C)
628 (setq chars (cdr chars))))
629
630;; JISX0212
269a5dd0 631;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
4ed46869
KH
632(modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
633(modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
634(modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
635
636(modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
637
638;; JISX0201-Kana
269a5dd0
DL
639;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
640(let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
641 ;; Unicode:
642 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
4ed46869
KH
643 (while chars
644 (modify-syntax-entry (car chars) ".")
645 (setq chars (cdr chars))))
646
226e4119
KH
647(modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
648(modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
649
4ed46869
KH
650;; Korean character set (KSC5601)
651
269a5dd0 652;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
4ed46869
KH
653(modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
654(modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
655(modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
656(modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
657(modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
658(modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
659
660(modify-category-entry (make-char 'korean-ksc5601) ?h)
661(modify-category-entry (make-char 'korean-ksc5601 35) ?A)
662(modify-category-entry (make-char 'korean-ksc5601 37) ?G)
663(modify-category-entry (make-char 'korean-ksc5601 42) ?H)
664(modify-category-entry (make-char 'korean-ksc5601 43) ?K)
665(modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
666
013d402e 667(let ((parens "\e$(C!2!3!4!5!6!7!8!9!:!;!<!=#(#)#[#]#{#}\e(B" )
70abfe90
KH
668 open close)
669 (dotimes (i (/ (length parens) 2))
670 (setq open (aref parens (* i 2))
671 close (aref parens (1+ (* i 2))))
672 (modify-syntax-entry open (format "(%c" close))
673 (modify-syntax-entry close (format ")%c" open))))
674
d05cfa1f
KH
675;; Latin character set (latin-1,2,3,4,5,8,9)
676
677(modify-category-entry (make-char 'latin-iso8859-1) ?l)
678(modify-category-entry (make-char 'latin-iso8859-2) ?l)
679(modify-category-entry (make-char 'latin-iso8859-3) ?l)
680(modify-category-entry (make-char 'latin-iso8859-4) ?l)
681(modify-category-entry (make-char 'latin-iso8859-9) ?l)
682(modify-category-entry (make-char 'latin-iso8859-14) ?l)
683(modify-category-entry (make-char 'latin-iso8859-15) ?l)
684
685(modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
686(modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
687(modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
688(modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
689(modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
690(modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
691(modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
692
6eba8645
KH
693;; Lao character set
694
695(modify-category-entry (make-char 'lao) ?o)
269a5dd0
DL
696(dotimes (i (1+ (- #xeff #xe80)))
697 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
6eba8645
KH
698
699(let ((deflist '(;; chars syntax category
700 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
701 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
702 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
703 ("\e(1XY\e(B" "w" ?3) ; vowel lower
a1506d29 704 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
6eba8645 705 ("\e(1\\e(B" "w" ?9) ; semivowel lower
9765a2ba 706 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
d5f05a67 707 ("\e(1Of\e(B" "_" ?5) ; symbol
269a5dd0
DL
708 ;; Unicode equivalents
709 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
710 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
711 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
712 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
a1506d29 713 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
269a5dd0
DL
714 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
715 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
716 ("\e$,1DODf\e(B" "_" ?5) ; symbol
6eba8645
KH
717 ))
718 elm chars len syntax category to ch i)
719 (while deflist
720 (setq elm (car deflist))
721 (setq chars (car elm)
722 len (length chars)
723 syntax (nth 1 elm)
724 category (nth 2 elm)
725 i 0)
726 (while (< i len)
727 (if (= (aref chars i) ?-)
728 (setq i (1+ i)
4a027a0d
KH
729 to (aref chars i))
730 (setq ch (aref chars i)
6eba8645
KH
731 to ch))
732 (while (<= ch to)
269a5dd0
DL
733 (unless (string-equal syntax "w")
734 (modify-syntax-entry ch syntax))
6eba8645
KH
735 (modify-category-entry ch category)
736 (setq ch (1+ ch)))
4a027a0d 737 (setq i (1+ i)))
6eba8645
KH
738 (setq deflist (cdr deflist))))
739
4ed46869
KH
740;; Thai character set (TIS620)
741
742(modify-category-entry (make-char 'thai-tis620) ?t)
269a5dd0
DL
743(dotimes (i (1+ (- #xe7f #xe00)))
744 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
4ed46869
KH
745
746(let ((deflist '(;; chars syntax category
747 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
748 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
749 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
750 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
a1506d29 751 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
9765a2ba
KH
752 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
753 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
269a5dd0
DL
754 ;; Unicode equivalents
755 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
756 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
757 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
758 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
a1506d29 759 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
269a5dd0
DL
760 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
761 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
4ed46869
KH
762 ))
763 elm chars len syntax category to ch i)
9395eb7c
KH
764 (while deflist
765 (setq elm (car deflist))
766 (setq chars (car elm)
767 len (length chars)
768 syntax (nth 1 elm)
769 category (nth 2 elm)
770 i 0)
771 (while (< i len)
772 (if (= (aref chars i) ?-)
773 (setq i (1+ i)
4a027a0d
KH
774 to (aref chars i))
775 (setq ch (aref chars i)
9395eb7c
KH
776 to ch))
777 (while (<= ch to)
269a5dd0
DL
778 (unless (string-equal syntax "w")
779 (modify-syntax-entry ch syntax))
9395eb7c
KH
780 (modify-category-entry ch category)
781 (setq ch (1+ ch)))
4a027a0d 782 (setq i (1+ i)))
9395eb7c
KH
783 (setq deflist (cdr deflist))))
784
785;; Tibetan character set
786
16230888
KH
787(modify-category-entry (make-char 'tibetan) ?q)
788(modify-category-entry (make-char 'tibetan-1-column) ?q)
269a5dd0
DL
789(dotimes (i (1+ (- #xfff #xf00)))
790 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
9395eb7c
KH
791
792(let ((deflist '(;; chars syntax category
269a5dd0 793 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
e6f02372 794 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
9395eb7c
KH
795 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
796 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
797 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
798 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
e6f02372 799 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
9395eb7c
KH
800 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
801 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
802 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
803 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
804 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
805 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
e6f02372 806 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
269a5dd0
DL
807
808 ;; Unicode version (not complete)
809 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
810 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
811 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
812 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
813 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
814 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
815 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
816 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
817 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
818 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
9395eb7c
KH
819 ))
820 elm chars len syntax category to ch i)
4ed46869
KH
821 (while deflist
822 (setq elm (car deflist))
823 (setq chars (car elm)
824 len (length chars)
825 syntax (nth 1 elm)
826 category (nth 2 elm)
827 i 0)
828 (while (< i len)
829 (if (= (aref chars i) ?-)
830 (setq i (1+ i)
4a027a0d
KH
831 to (aref chars i))
832 (setq ch (aref chars i)
4ed46869
KH
833 to ch))
834 (while (<= ch to)
269a5dd0
DL
835 (unless (string-equal syntax "w")
836 (modify-syntax-entry ch syntax))
4ed46869
KH
837 (modify-category-entry ch category)
838 (setq ch (1+ ch)))
4a027a0d 839 (setq i (1+ i)))
4ed46869
KH
840 (setq deflist (cdr deflist))))
841
842;; Vietnamese character set
843
844(let ((lower (make-char 'vietnamese-viscii-lower))
845 (upper (make-char 'vietnamese-viscii-upper)))
269a5dd0
DL
846;; (modify-syntax-entry lower "w")
847;; (modify-syntax-entry upper "w")
4ed46869
KH
848 (modify-category-entry lower ?v)
849 (modify-category-entry upper ?v)
850 (modify-category-entry lower ?l) ; To make a word with
851 (modify-category-entry upper ?l) ; latin characters.
852 )
853
e5dd1155
KH
854(let ((tbl (standard-case-table))
855 (i 32))
856 (while (< i 128)
857 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
858 (make-char 'vietnamese-viscii-lower i)
859 tbl)
860 (setq i (1+ i))))
861
d05cfa1f
KH
862;; Unicode (mule-unicode-0100-24ff)
863
85ef8ece
KH
864(let ((tbl (standard-case-table)) c)
865
866;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
867;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
868;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
869;; Thus we have to check language-environment to handle casing
870;; correctly. Currently only I<->i is available.
871
85ef8ece
KH
872 ;; Latin Extended-A, Latin Extended-B
873 (setq c #x0100)
874 (while (<= c #x0233)
d05cfa1f 875 (modify-category-entry (decode-char 'ucs c) ?l)
85ef8ece
KH
876 (and (or (<= c #x012e)
877 (and (>= c #x014a) (<= c #x0177)))
d05cfa1f
KH
878 (zerop (% c 2))
879 (set-case-syntax-pair
880 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
85ef8ece
KH
881 (and (>= c #x013a)
882 (<= c #x0148)
883 (zerop (% c 2))
884 (set-case-syntax-pair
885 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
d05cfa1f 886 (setq c (1+ c)))
fbbde29a
KH
887 ;;(set-downcase-syntax ?\e$,1 P\e(B ?i tbl)
888 ;;(set-upcase-syntax ?I ?\e$,1 Q\e(B tbl)
85ef8ece
KH
889 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
890 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
891 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
e0a65b4d 892 (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl)
d05cfa1f
KH
893 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
894 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
895 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
896
269a5dd0
DL
897 ;; Latin Extended-B
898 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
899 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
900 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
901 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
902 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
903 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
904 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
905 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
906 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
907 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
908 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
909 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
910 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
911 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
912 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
913 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
914 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
915 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
916 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
917 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
918 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
919 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
920 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
921 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
922 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
923 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
924 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
925 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
926 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
927 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
928 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
929 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
930 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
931 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
932 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
933 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
934 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
935 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
936 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
937 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
938 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
939 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
940 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
941 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
942 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
943 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
944 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
945 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
946 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
947 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
948 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
949 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
950 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
951 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
952 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
953 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
954 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
955 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
956 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
957 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
958 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
959 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
960 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
961 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
962 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
963 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
964 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
965 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
966 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
967 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
968 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
969 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
970 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
971 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
972 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
973 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
974 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
975 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
976 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
977 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
978 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
979 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
980 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
981 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
982 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
983 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
984 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
985 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
986 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
987 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
988 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
989 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
990 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
991 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
992
85ef8ece
KH
993 ;; Latin Extended Additional
994 (setq c #x1e00)
d05cfa1f
KH
995 (while (<= c #x1ef9)
996 (modify-category-entry (decode-char 'ucs c) ?l)
997 (and (zerop (% c 2))
998 (or (<= c #x1e94) (>= c #x1ea0))
999 (set-case-syntax-pair
1000 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1001 (setq c (1+ c)))
1002
85ef8ece
KH
1003 ;; Greek
1004 (setq c #x0370)
d05cfa1f
KH
1005 (while (<= c #x03ff)
1006 (modify-category-entry (decode-char 'ucs c) ?g)
1007 (if (or (and (>= c #x0391) (<= c #x03a1))
1008 (and (>= c #x03a3) (<= c #x03ab)))
1009 (set-case-syntax-pair
1010 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1011 (and (>= c #x03da)
1012 (<= c #x03ee)
1013 (zerop (% c 2))
1014 (set-case-syntax-pair
1015 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1016 (setq c (1+ c)))
1017 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
1018 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
1019 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
1020 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
1021 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
1022 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
1023 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
1024
269a5dd0
DL
1025 ;; Armenian
1026 (setq c #x531)
1027 (while (<= c #x556)
1028 (set-case-syntax-pair (decode-char 'ucs c)
1029 (decode-char 'ucs (+ c #x30)) tbl)
1030 (setq c (1+ c)))
1031
85ef8ece
KH
1032 ;; Greek Extended
1033 (setq c #x1f00)
d05cfa1f
KH
1034 (while (<= c #x1fff)
1035 (modify-category-entry (decode-char 'ucs c) ?g)
1036 (and (<= (logand c #x000f) 7)
1037 (<= c #x1fa7)
1038 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1039 (/= (logand c #x00f0) 7)
1040 (set-case-syntax-pair
1041 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1042 (setq c (1+ c)))
1043 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1044 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1045 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1046 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1047 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1048 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1049 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1050 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1051 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1052 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1053 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1054 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1055 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1056 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1057 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1058 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1059 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1060 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1061 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1062 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1063 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1064 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1065 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1066 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1067
85ef8ece
KH
1068 ;; cyrillic
1069 (setq c #x0400)
d05cfa1f
KH
1070 (while (<= c #x04ff)
1071 (modify-category-entry (decode-char 'ucs c) ?y)
1072 (and (>= c #x0400)
1073 (<= c #x040f)
1074 (set-case-syntax-pair
1075 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1076 (and (>= c #x0410)
1077 (<= c #x042f)
1078 (set-case-syntax-pair
1079 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1080 (and (zerop (% c 2))
1081 (or (and (>= c #x0460) (<= c #x0480))
1082 (and (>= c #x048c) (<= c #x04be))
1083 (and (>= c #x04d0) (<= c #x04f4)))
1084 (set-case-syntax-pair
a1506d29 1085 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
d05cfa1f
KH
1086 (setq c (1+ c)))
1087 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1088 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1089 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1090 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1091 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1092
85ef8ece
KH
1093 ;; general punctuation
1094 (setq c #x2000)
d05cfa1f 1095 (while (<= c #x200b)
348a6a50 1096 (set-case-syntax (decode-char 'ucs c) " " tbl)
c1dc897c 1097 (setq c (1+ c)))
d05cfa1f 1098 (setq c #x2010)
c1dc897c
DL
1099 (while (<= c #x2016)
1100 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1101 (setq c (1+ c)))
1102 ;; Punctuation syntax for quotation marks (like `)
1103 (while (<= c #x201f)
1104 (set-case-syntax (decode-char 'ucs c) "." tbl)
1105 (setq c (1+ c)))
d05cfa1f 1106 (while (<= c #x2027)
348a6a50 1107 (set-case-syntax (decode-char 'ucs c) "_" tbl)
c1dc897c 1108 (setq c (1+ c)))
d05cfa1f 1109
269a5dd0
DL
1110 ;; Roman numerals
1111 (setq c #x2160)
1112 (while (<= c #x216f)
1113 (set-case-syntax-pair (decode-char 'ucs c)
1114 (decode-char 'ucs (+ c #x10)) tbl)
1115 (setq c (1+ c)))
1116
1117 ;; Circled Latin
1118 (setq c #x24b6)
1119 (while (<= c #x24cf)
1120 (set-case-syntax-pair (decode-char 'ucs c)
1121 (decode-char 'ucs (+ c 26)) tbl)
1122 (modify-category-entry (decode-char 'ucs c) ?l)
1123 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1124 (setq c (1+ c)))
1125
1126 ;; Fullwidth Latin
1127 (setq c #xff21)
1128 (while (<= c #xff3a)
1129 (set-case-syntax-pair (decode-char 'ucs c)
1130 (decode-char 'ucs (+ c #x20)) tbl)
1131 (modify-category-entry (decode-char 'ucs c) ?l)
1132 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1133 (setq c (1+ c)))
1134
269a5dd0
DL
1135 ;; Combining diacritics
1136 (setq c #x300)
1137 (while (<= c #x362)
1138 (modify-category-entry (decode-char 'ucs c) ?^)
1139 (setq c (1+ c)))
1140
1141 ;; Combining marks
1142 (setq c #x20d0)
1143 (while (<= c #x20e3)
1144 (modify-category-entry (decode-char 'ucs c) ?^)
1145 (setq c (1+ c)))
1146
1147 ;; Fixme: syntax for symbols &c
1148 )
d6af0bff
KH
1149
1150(let ((pairs
1151 '("\e$,1sEsF\e(B" ; U+2045 U+2046
1152 "\e$,1s}s~\e(B" ; U+207D U+207E
1153 "\e$,1t-t.\e(B" ; U+208D U+208E
d6af0bff
KH
1154 "\e$,1{){*\e(B" ; U+2329 U+232A
1155 "\e$,1|T|U\e(B" ; U+23B4 U+23B5
1156 "\e$,2&H&I\e(B" ; U+2768 U+2769
1157 "\e$,2&J&K\e(B" ; U+276A U+276B
1158 "\e$,2&L&M\e(B" ; U+276C U+276D
1159 "\e$,2&P&Q\e(B" ; U+2770 U+2771
1160 "\e$,2&R&S\e(B" ; U+2772 U+2773
1161 "\e$,2&T&U\e(B" ; U+2774 U+2775
1162 "\e$,2'f'g\e(B" ; U+27E6 U+27E7
1163 "\e$,2'h'i\e(B" ; U+27E8 U+27E9
1164 "\e$,2'j'k\e(B" ; U+27EA U+27EB
1165 "\e$,2,#,$\e(B" ; U+2983 U+2984
1166 "\e$,2,%,&\e(B" ; U+2985 U+2986
1167 "\e$,2,',(\e(B" ; U+2987 U+2988
1168 "\e$,2,),*\e(B" ; U+2989 U+298A
1169 "\e$,2,+,,\e(B" ; U+298B U+298C
1170 "\e$,2,-,.\e(B" ; U+298D U+298E
1171 "\e$,2,/,0\e(B" ; U+298F U+2990
1172 "\e$,2,1,2\e(B" ; U+2991 U+2992
1173 "\e$,2,3,4\e(B" ; U+2993 U+2994
1174 "\e$,2,5,6\e(B" ; U+2995 U+2996
1175 "\e$,2,7,8\e(B" ; U+2997 U+2998
1176 "\e$,2-<-=\e(B" ; U+29FC U+29FD
1177 "\e$,2=H=I\e(B" ; U+3008 U+3009
1178 "\e$,2=J=K\e(B" ; U+300A U+300B
1179 "\e$,2=L=M\e(B" ; U+300C U+300D
1180 "\e$,2=N=O\e(B" ; U+300E U+300F
1181 "\e$,2=P=Q\e(B" ; U+3010 U+3011
1182 "\e$,2=T=U\e(B" ; U+3014 U+3015
1183 "\e$,2=V=W\e(B" ; U+3016 U+3017
1184 "\e$,2=X=Y\e(B" ; U+3018 U+3019
1185 "\e$,2=Z=[\e(B" ; U+301A U+301B
1186 "\e$,3m~m\7f\e(B" ; U+FD3E U+FD3F
1187 "\e$,3pUpV\e(B" ; U+FE35 U+FE36
1188 "\e$,3pWpX\e(B" ; U+FE37 U+FE38
1189 "\e$,3pYpZ\e(B" ; U+FE39 U+FE3A
1190 "\e$,3p[p\\e(B" ; U+FE3B U+FE3C
1191 "\e$,3p]p^\e(B" ; U+FE3D U+FE3E
1192 "\e$,3p_p`\e(B" ; U+FE3F U+FE40
1193 "\e$,3papb\e(B" ; U+FE41 U+FE42
1194 "\e$,3pcpd\e(B" ; U+FE43 U+FE44
1195 "\e$,3pypz\e(B" ; U+FE59 U+FE5A
1196 "\e$,3p{p|\e(B" ; U+FE5B U+FE5C
1197 "\e$,3p}p~\e(B" ; U+FE5D U+FE5E
1198 "\e$,3rhri\e(B" ; U+FF08 U+FF09
1199 "\e$,3s;s=\e(B" ; U+FF3B U+FF3D
1200 "\e$,3s[s]\e(B" ; U+FF5B U+FF5D
1201 "\e$,3s_s`\e(B" ; U+FF5F U+FF60
1202 "\e$,3sbsc\e(B" ; U+FF62 U+FF63
1203 )))
1204 (dolist (elt pairs)
1205 (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
1206 (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
1207
4ed46869
KH
1208\f
1209;;; Setting word boundary.
1210
1211(setq word-combining-categories
1212 '((?l . ?l)))
1213
1214(setq word-separating-categories ; (2-byte character sets)
1215 '((?A . ?K) ; Alpha numeric - Katakana
1216 (?A . ?C) ; Alpha numeric - Chinese
1217 (?H . ?A) ; Hiragana - Alpha numeric
1218 (?H . ?K) ; Hiragana - Katakana
1219 (?H . ?C) ; Hiragana - Chinese
1220 (?K . ?A) ; Katakana - Alpha numeric
1221 (?K . ?C) ; Katakana - Chinese
1222 (?C . ?A) ; Chinese - Alpha numeric
1223 (?C . ?K) ; Chinese - Katakana
1224 ))
777cfce6
KH
1225
1226\f
1227;; For each character set, put the information of the most proper
aaa9f206 1228;; coding system to encode it by `preferred-coding-system' property.
777cfce6
KH
1229
1230(let ((l '((latin-iso8859-1 . iso-latin-1)
1231 (latin-iso8859-2 . iso-latin-2)
1232 (latin-iso8859-3 . iso-latin-3)
1233 (latin-iso8859-4 . iso-latin-4)
1234 (thai-tis620 . thai-tis620)
1235 (greek-iso8859-7 . greek-iso-8bit)
1236 (arabic-iso8859-6 . iso-2022-7bit)
1237 (hebrew-iso8859-8 . hebrew-iso-8bit)
1238 (katakana-jisx0201 . japanese-shift-jis)
1239 (latin-jisx0201 . japanese-shift-jis)
1240 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1241 (latin-iso8859-9 . iso-latin-5)
1242 (japanese-jisx0208-1978 . iso-2022-jp)
1243 (chinese-gb2312 . cn-gb-2312)
1244 (japanese-jisx0208 . iso-2022-jp)
1245 (korean-ksc5601 . iso-2022-kr)
1246 (japanese-jisx0212 . iso-2022-jp)
1247 (chinese-cns11643-1 . iso-2022-cn)
1248 (chinese-cns11643-2 . iso-2022-cn)
1249 (chinese-big5-1 . chinese-big5)
1250 (chinese-big5-2 . chinese-big5)
1251 (chinese-sisheng . iso-2022-7bit)
1252 (ipa . iso-2022-7bit)
1253 (vietnamese-viscii-lower . vietnamese-viscii)
1254 (vietnamese-viscii-upper . vietnamese-viscii)
1255 (arabic-digit . iso-2022-7bit)
1256 (arabic-1-column . iso-2022-7bit)
1257 (ascii-right-to-left . iso-2022-7bit)
1258 (lao . lao)
1259 (arabic-2-column . iso-2022-7bit)
1260 (indian-is13194 . devanagari)
69e138b2 1261 (indian-glyph . devanagari)
777cfce6 1262 (tibetan-1-column . tibetan)
58cd41a3 1263 (ethiopic . iso-2022-7bit)
777cfce6
KH
1264 (chinese-cns11643-3 . iso-2022-cn)
1265 (chinese-cns11643-4 . iso-2022-cn)
1266 (chinese-cns11643-5 . iso-2022-cn)
1267 (chinese-cns11643-6 . iso-2022-cn)
1268 (chinese-cns11643-7 . iso-2022-cn)
1269 (indian-2-column . devanagari)
7a860cf2
DL
1270 (tibetan . tibetan)
1271 (latin-iso8859-14 . iso-latin-8)
1272 (latin-iso8859-15 . iso-latin-9))))
777cfce6 1273 (while l
aaa9f206 1274 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
777cfce6 1275 (setq l (cdr l))))
df0415c5
KH
1276
1277\f
98a663f1 1278;; Setup auto-fill-chars for charsets that should invoke auto-filling.
269a5dd0
DL
1279;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1280;; property on the charsets.
df0415c5
KH
1281(let ((l '(katakana-jisx0201
1282 japanese-jisx0208 japanese-jisx0212
1283 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1284 (while l
1285 (aset auto-fill-chars (make-char (car l)) t)
98a663f1 1286 (put-charset-property (car l) 'nospace-between-words t)
df0415c5 1287 (setq l (cdr l))))
777cfce6 1288
ae0916f8
KH
1289\f
1290(setq utf-translate-cjk-mode saved-utf-translate-cjk-mode)
1291(makunbound 'saved-utf-translate-cjk-mode)
1292
777cfce6
KH
1293;;; Local Variables:
1294;;; coding: iso-2022-7bit
1295;;; End:
1296
ab5796a9 1297;;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d
60370d40 1298;;; characters.el ends here