Fix previous change.
[bpt/emacs.git] / lisp / international / characters.el
CommitLineData
4ed46869
KH
1;;; characters.el --- set syntax and category for multibyte characters
2
28636af6 3;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
fa526c4a 4;; Licensed to the Free Software Foundation.
cf6af551 5;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4ed46869
KH
6
7;; Keywords: multibyte character, character set, syntax, category
8
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
13;; the Free Software Foundation; either version 2, or (at your option)
14;; any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
369314dc
KH
22;; along with GNU Emacs; see the file COPYING. If not, write to the
23;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24;; Boston, MA 02111-1307, USA.
4ed46869
KH
25
26;;; Commentary:
27
28;; This file contains multibyte characters. Save this file always in
4b16fa0c 29;; the coding system `iso-2022-7bit'.
4ed46869 30
49adf443
RS
31;; This file does not define the syntax for Latin-N character sets;
32;; those are defined by the files latin-N.el.
33
60370d40
PJ
34;;; Code:
35
ae0916f8
KH
36;; We must set utf-translate-cjk-mode to nil while loading this file
37;; to avoid translating CJK characters in decode-char.
38(defvar saved-utf-translate-cjk-mode utf-translate-cjk-mode)
39(setq utf-translate-cjk-mode nil)
40
4ed46869
KH
41;;; Predefined categories.
42
43;; For each character set.
44
45(define-category ?a "ASCII")
46(define-category ?l "Latin")
47(define-category ?t "Thai")
48(define-category ?g "Greek")
49(define-category ?b "Arabic")
50(define-category ?w "Hebrew")
51(define-category ?y "Cyrillic")
52(define-category ?k "Japanese katakana")
53(define-category ?r "Japanese roman")
54(define-category ?c "Chinese")
55(define-category ?j "Japanese")
56(define-category ?h "Korean")
57(define-category ?e "Ethiopic (Ge'ez)")
58(define-category ?v "Vietnamese")
59(define-category ?i "Indian")
6eba8645 60(define-category ?o "Lao")
9395eb7c 61(define-category ?q "Tibetan")
4ed46869
KH
62
63;; For each group (row) of 2-byte character sets.
64
94487c4e 65(define-category ?A "Alpha-numeric characters of 2-byte character sets")
4ed46869 66(define-category ?C "Chinese (Han) characters of 2-byte character sets")
94487c4e 67(define-category ?G "Greek characters of 2-byte character sets")
4ed46869
KH
68(define-category ?H "Japanese Hiragana characters of 2-byte character sets")
69(define-category ?K "Japanese Katakana characters of 2-byte character sets")
70(define-category ?N "Korean Hangul characters of 2-byte character sets")
94487c4e 71(define-category ?Y "Cyrillic characters of 2-byte character sets")
4ed46869
KH
72(define-category ?I "Indian Glyphs")
73
74;; For phonetic classifications.
75
76(define-category ?0 "consonant")
9765a2ba 77(define-category ?1 "base (independent) vowel")
4ed46869
KH
78(define-category ?2 "upper diacritical mark (including upper vowel)")
79(define-category ?3 "lower diacritical mark (including lower vowel)")
80(define-category ?4 "tone mark")
9765a2ba 81(define-category ?5 "symbol")
4ed46869
KH
82(define-category ?6 "digit")
83(define-category ?7 "vowel-modifying diacritical mark")
6eba8645
KH
84(define-category ?8 "vowel-signs")
85(define-category ?9 "semivowel lower")
4ed46869
KH
86
87;; For filling.
88(define-category ?| "While filling, we can break a line at this character.")
89
504af7b2 90;; For indentation calculation.
70ea295a 91(define-category ?\s
777cfce6 92 "This character counts as a space for indentation purposes.")
504af7b2 93
94487c4e 94;; Keep the following for `kinsoku' processing. See comments in
4ed46869
KH
95;; kinsoku.el.
96(define-category ?> "A character which can't be placed at beginning of line.")
97(define-category ?< "A character which can't be placed at end of line.")
98
269a5dd0
DL
99;; Combining
100(define-category ?^ "Combining diacritic or mark")
4ed46869
KH
101\f
102;;; Setting syntax and category.
103
104;; ASCII
105
106(let ((ch 32))
107 (while (< ch 127) ; All ASCII characters have
108 (modify-category-entry ch ?a) ; the category `a' (ASCII)
109 (modify-category-entry ch ?l) ; and `l' (Latin).
110 (setq ch (1+ ch))))
111
112;; Arabic character set
113
114(let ((charsets '(arabic-iso8859-6
115 arabic-digit
116 arabic-1-column
117 arabic-2-column)))
118 (while charsets
269a5dd0 119;; (modify-syntax-entry (make-char (car charsets)) "w")
4ed46869
KH
120 (modify-category-entry (make-char (car charsets)) ?b)
121 (setq charsets (cdr charsets))))
269a5dd0
DL
122(let ((ch #x600))
123 (while (<= ch #x6ff)
124 (modify-category-entry (decode-char 'ucs ch) ?b)
125 (setq ch (1+ ch)))
126 (setq ch #xfb50)
127 (while (<= ch #xfdff)
128 (modify-category-entry (decode-char 'ucs ch) ?b)
129 (setq ch (1+ ch)))
130 (setq ch #xfe70)
131 (while (<= ch #xfefe)
132 (modify-category-entry (decode-char 'ucs ch) ?b)
133 (setq ch (1+ ch))))
4ed46869
KH
134
135;; Chinese character set (GB2312)
136
269a5dd0 137;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
4ed46869
KH
138(modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
139(modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
140(modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
141(modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
142(modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
143(modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
144(modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
145(modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
146(modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
147(modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
840f8f73
KH
148(modify-syntax-entry ?\\e$A#(\e(B "(\e$A#)\e(B")
149(modify-syntax-entry ?\\e$A#{\e(B "(\e$A#}\e(B")
150(modify-syntax-entry ?\\e$A#[\e(B "(\e$A#]\e(B")
4ed46869
KH
151(modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
152(modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
153(modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
154(modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
155(modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
156(modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
157(modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
840f8f73
KH
158(modify-syntax-entry ?\\e$A#)\e(B ")\e$A#(\e(B")
159(modify-syntax-entry ?\\e$A#}\e(B ")\e$A#{\e(B")
160(modify-syntax-entry ?\\e$A#]\e(B ")\e$A#[\e(B")
4ed46869 161
840f8f73
KH
162(let ((chars "\e$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d\e(B"))
163 (dotimes (i (length chars))
164 (modify-syntax-entry (aref chars i) ".")))
165
4ed46869
KH
166(modify-category-entry (make-char 'chinese-gb2312) ?c)
167(modify-category-entry (make-char 'chinese-gb2312) ?\|)
168(modify-category-entry (make-char 'chinese-gb2312 35) ?A)
169(modify-category-entry (make-char 'chinese-gb2312 36) ?H)
170(modify-category-entry (make-char 'chinese-gb2312 37) ?K)
171(modify-category-entry (make-char 'chinese-gb2312 38) ?G)
172(modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
4ed46869
KH
173(let ((row 48))
174 (while (< row 127)
175 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
176 (setq row (1+ row))))
177
178;; Chinese character set (BIG5)
179
840f8f73
KH
180(let ((from (decode-big5-char #xA141))
181 (to (decode-big5-char #xA15D)))
182 (while (< from to)
183 (modify-syntax-entry from ".")
184 (setq from (1+ from))))
185(let ((from (decode-big5-char #xA1A5))
186 (to (decode-big5-char #xA1AD)))
187 (while (< from to)
188 (modify-syntax-entry from ".")
189 (setq from (1+ from))))
190(let ((from (decode-big5-char #xA1AD))
191 (to (decode-big5-char #xA2AF)))
192 (while (< from to)
193 (modify-syntax-entry from "_")
194 (setq from (1+ from))))
195
196(let ((parens "\e$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
197 open close)
198 (dotimes (i (/ (length parens) 2))
199 (setq open (aref parens (* i 2))
200 close (aref parens (1+ (* i 2))))
201 (modify-syntax-entry open (format "(%c" close))
202 (modify-syntax-entry close (format ")%c" open))))
203
4ed46869
KH
204(let ((generic-big5-1-char (make-char 'chinese-big5-1))
205 (generic-big5-2-char (make-char 'chinese-big5-2)))
269a5dd0
DL
206;; (modify-syntax-entry generic-big5-1-char "w")
207;; (modify-syntax-entry generic-big5-2-char "w")
4ed46869
KH
208
209 (modify-category-entry generic-big5-1-char ?c)
210 (modify-category-entry generic-big5-2-char ?c)
211
212 (modify-category-entry generic-big5-1-char ?C)
213 (modify-category-entry generic-big5-2-char ?C)
214
215 (modify-category-entry generic-big5-1-char ?\|)
216 (modify-category-entry generic-big5-2-char ?\|))
217
218
219;; Chinese character set (CNS11643)
220
221(let ((cns-list '(chinese-cns11643-1
222 chinese-cns11643-2
223 chinese-cns11643-3
224 chinese-cns11643-4
225 chinese-cns11643-5
226 chinese-cns11643-6
227 chinese-cns11643-7))
228 generic-char)
229 (while cns-list
230 (setq generic-char (make-char (car cns-list)))
269a5dd0 231;; (modify-syntax-entry generic-char "w")
4ed46869
KH
232 (modify-category-entry generic-char ?c)
233 (modify-category-entry generic-char ?C)
234 (modify-category-entry generic-char ?|)
235 (setq cns-list (cdr cns-list))))
236
70abfe90
KH
237(let ((parens "\e$(G!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
238 open close)
239 (dotimes (i (/ (length parens) 2))
240 (setq open (aref parens (* i 2))
241 close (aref parens (1+ (* i 2))))
242 (modify-syntax-entry open (format "(%c" close))
243 (modify-syntax-entry close (format ")%c" open))))
244
4ed46869
KH
245;; Cyrillic character set (ISO-8859-5)
246
247(modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
248
28636af6 249(modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
4ed46869
KH
250(modify-syntax-entry ?\e,L-\e(B ".")
251(modify-syntax-entry ?\e,Lp\e(B ".")
252(modify-syntax-entry ?\e,L}\e(B ".")
28636af6
RS
253(let ((tbl (standard-case-table)))
254 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
255 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
256 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
257 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
258 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
259 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
260 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
261 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
262 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
263 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
264 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
265 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
266 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
267 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
268 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
269 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
270 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
271 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
272 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
273 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
274 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
275 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
276 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
277 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
278 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
279 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
280 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
281 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
282 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
283 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
284 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
285 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
286 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
287 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
288 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
289 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
290 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
291 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
292 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
293 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
294 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
295 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
296 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
297 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
298 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
269a5dd0
DL
299 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
300 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
301 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
302 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
303 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
304 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
305 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
306 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
307 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
308 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
309 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
310 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
311 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
312 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
313 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
314 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
315 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
316 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
317 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
318 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
319 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
320 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
321 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
322 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
323 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
324 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
325 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
326 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
327 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
328 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
329 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
330 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
331 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
332 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
333 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
334 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
335 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
336 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
337 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
338 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
339 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
340 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
341 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
342 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
343 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
344 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
345 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
4ed46869 346
9395eb7c
KH
347;; Devanagari character set
348
269a5dd0
DL
349;;; Commented out since the categories appear not to be used anywhere
350;;; and word syntax is the default.
351;; (let ((deflist '(;; chars syntax category
352;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
353;; ; chandrabindu, anuswar, visarga
354;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
355;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
356;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
357;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
358;; ;; Unicode equivalents
359;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
360;; ; chandrabindu, anuswar, visarga
361;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
362;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
363;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
364;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
365;; ))
366;; elm chars len syntax category to ch i)
367;; (while deflist
368;; (setq elm (car deflist))
369;; (setq chars (car elm)
370;; len (length chars)
371;; syntax (nth 1 elm)
372;; category (nth 2 elm)
373;; i 0)
374;; (while (< i len)
375;; (if (= (aref chars i) ?-)
376;; (setq i (1+ i)
377;; to (aref chars i))
378;; (setq ch (aref chars i)
379;; to ch))
380;; (while (<= ch to)
381;; (modify-syntax-entry ch syntax)
382;; (modify-category-entry ch category)
383;; (setq ch (1+ ch)))
384;; (setq i (1+ i)))
385;; (setq deflist (cdr deflist))))
9395eb7c 386
4ed46869
KH
387;; Ethiopic character set
388
389(modify-category-entry (make-char 'ethiopic) ?e)
269a5dd0
DL
390;; (modify-syntax-entry (make-char 'ethiopic) "w")
391(dotimes (i (1+ (- #x137c #x1200)))
392 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
393(let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
394 ;; Unicode equivalents of the above:
395 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
c23d3872
KH
396 (while chars
397 (modify-syntax-entry (car chars) ".")
398 (setq chars (cdr chars))))
4ed46869 399
4ed46869
KH
400;; Greek character set (ISO-8859-7)
401
402(modify-category-entry (make-char 'greek-iso8859-7) ?g)
269a5dd0
DL
403(let ((c #x370))
404 (while (<= c #x3ff)
405 (modify-category-entry (decode-char 'ucs c) ?g)
4ed46869 406 (setq c (1+ c))))
269a5dd0
DL
407
408;; (let ((c 182))
409;; (while (< c 255)
410;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
411;; (setq c (1+ c))))
412;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
4ed46869
KH
413(modify-syntax-entry ?\e,F7\e(B ".")
414(modify-syntax-entry ?\e,F;\e(B ".")
415(modify-syntax-entry ?\e,F=\e(B ".")
4b7c7a40 416(let ((tbl (standard-case-table)))
269a5dd0
DL
417 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
418 ;; in several cases.
419 (set-case-syntax ?\e,F!\e(B "." tbl)
420 (set-case-syntax ?\e,F"\e(B "." tbl)
421 (set-case-syntax ?\e,F&\e(B "." tbl)
422 (set-case-syntax ?\e,F&\e(B "_" tbl)
423 (set-case-syntax ?\e,F'\e(B "." tbl)
424 (set-case-syntax ?\e,F)\e(B "_" tbl)
425 (set-case-syntax ?\e,F+\e(B "." tbl)
426 (set-case-syntax ?\e,F,\e(B "_" tbl)
427 (set-case-syntax ?\e,F-\e(B "_" tbl)
428 (set-case-syntax ?\e,F/\e(B "." tbl)
429 (set-case-syntax ?\e,F0\e(B "_" tbl)
430 (set-case-syntax ?\e,F1\e(B "_" tbl)
431;; (set-case-syntax ?\e,F7\e(B "_" tbl)
432;; (set-case-syntax ?\e,F=\e(B "_" tbl)
4b7c7a40
DL
433 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
434 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
435 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
436 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
437 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
438 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
439 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
440 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
441 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
442 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
443 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
444 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
445 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
446 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
447 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
448 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
449 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
450 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
451 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
452 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
453 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
454 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
455 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
456 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
457 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
458 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
459 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
460 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
461 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
462 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
463 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
464 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
269a5dd0
DL
465 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
466 ;; Unicode equivalents
467 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
468 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
469 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
470 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
471 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
472 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
473 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
474 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
475 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
476 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
477 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
478 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
479 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
480 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
481 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
482 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
483 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
484 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
485 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
486 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
487 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
488 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
489 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
490 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
491 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
492 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
493 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
494 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
495 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
496 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
497 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
498 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
499 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
4ed46869
KH
500
501;; Hebrew character set (ISO-8859-8)
502
227f528e
EZ
503(modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
504(let ((c #x591))
505 (while (<= c #x5f4)
506 (modify-category-entry (decode-char 'ucs c) ?w)
507 (setq c (1+ c))))
508
509(modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
510(modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
511(modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
512(modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
513(modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
514(modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
515(modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
4ed46869 516
269a5dd0
DL
517;; (let ((c 224))
518;; (while (< c 251)
519;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
520;; (setq c (1+ c))))
521;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
4ed46869
KH
522
523;; Indian character set (IS 13194 and other Emacs original Indian charsets)
524
525(modify-category-entry (make-char 'indian-is13194) ?i)
526(modify-category-entry (make-char 'indian-2-column) ?I)
69e138b2 527(modify-category-entry (make-char 'indian-glyph) ?I)
269a5dd0
DL
528;; Unicode Devanagari block
529(let ((c #x901))
530 (while (<= c #x970)
531 (modify-category-entry (decode-char 'ucs c) ?i)
532 (setq c (1+ c))))
4ed46869 533
ed459fb6
KH
534(let ((l '(;; RANGE CATEGORY MEANINGS
535 (#x01 #x03 ?7) ; vowel modifier
536 (#x05 #x14 ?1) ; base vowel
537 (#x15 #x39 ?0) ; consonants
538 (#x3e #x4d ?8) ; vowel modifier
539 (#x51 #x54 ?4) ; stress/tone mark
540 (#x58 #x5f ?0) ; consonants
541 (#x60 #x61 ?1) ; base vowel
542 (#x62 #x63 ?8) ; vowel modifier
543 (#x66 #x6f ?6) ; digits
544 )))
545 (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00))
546 (dolist (elt2 l)
547 (let* ((from (car elt2))
548 (counts (1+ (- (nth 1 elt2) from)))
549 (category (nth 2 elt2)))
550 (dotimes (i counts)
551 (modify-category-entry (decode-char 'ucs (+ elt1 from i))
552 category))))))
4a027a0d 553
4ed46869
KH
554;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
555
556(modify-category-entry (make-char 'katakana-jisx0201) ?k)
226e4119 557(modify-category-entry (make-char 'katakana-jisx0201) ?j)
4ed46869
KH
558(modify-category-entry (make-char 'latin-jisx0201) ?r)
559(modify-category-entry (make-char 'japanese-jisx0208) ?j)
560(modify-category-entry (make-char 'japanese-jisx0212) ?j)
9395eb7c 561(modify-category-entry (make-char 'katakana-jisx0201) ?\|)
4ed46869 562(modify-category-entry (make-char 'japanese-jisx0208) ?\|)
9395eb7c 563(modify-category-entry (make-char 'japanese-jisx0212) ?\|)
4ed46869 564
269a5dd0
DL
565;; Unicode equivalents of JISX0201-kana
566(let ((c #xff61))
567 (while (<= c #xff9f)
568 (modify-category-entry (decode-char 'ucs c) ?k)
569 (modify-category-entry (decode-char 'ucs c) ?j)
570 (modify-category-entry (decode-char 'ucs c) ?\|)
571 (setq c (1+ c))))
572
573;; Katakana block
574(let ((c #x30a0))
575 (while (<= c #x30ff)
576 ;; ?K is double width, ?k isn't specified
577 (modify-category-entry (decode-char 'ucs c) ?k)
578 (modify-category-entry (decode-char 'ucs c) ?j)
a1506d29 579 (modify-category-entry (decode-char 'ucs c) ?\|)
269a5dd0
DL
580 (setq c (1+ c))))
581
582;; Hiragana block
583(let ((c #x3040))
584 (while (<= c #x309f)
585 ;; ?H is actually defined to be double width
586 (modify-category-entry (decode-char 'ucs c) ?H)
587 ;;(modify-category-entry (decode-char 'ucs c) ?j)
a1506d29 588 (modify-category-entry (decode-char 'ucs c) ?\|)
269a5dd0
DL
589 (setq c (1+ c))))
590
4ed46869 591;; JISX0208
269a5dd0 592;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
4ed46869
KH
593(modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
594(modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
595(modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
cf6af551
MB
596(let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
597 (while chars
598 (modify-syntax-entry (car chars) "w")
599 (setq chars (cdr chars))))
70abfe90
KH
600(let ((parens "\e$B!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![\e(B" )
601 open close)
602 (dotimes (i (/ (length parens) 2))
603 (setq open (aref parens (* i 2))
604 close (aref parens (1+ (* i 2))))
605 (modify-syntax-entry open (format "(%c" close))
606 (modify-syntax-entry close (format ")%c" open))))
4ed46869
KH
607
608(modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
609(modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
610(modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
611(modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
612(modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
613(let ((row 48))
614 (while (< row 127)
615 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
616 (setq row (1+ row))))
210dede6
KH
617(modify-category-entry ?\e$B!<\e(B ?K)
618(let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
4ed46869
KH
619 (while chars
620 (modify-category-entry (car chars) ?K)
621 (modify-category-entry (car chars) ?H)
622 (setq chars (cdr chars))))
623(let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
624 (while chars
625 (modify-category-entry (car chars) ?C)
626 (setq chars (cdr chars))))
627
628;; JISX0212
269a5dd0 629;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
4ed46869
KH
630(modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
631(modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
632(modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
633
634(modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
635
636;; JISX0201-Kana
269a5dd0
DL
637;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
638(let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
639 ;; Unicode:
640 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
4ed46869
KH
641 (while chars
642 (modify-syntax-entry (car chars) ".")
643 (setq chars (cdr chars))))
644
226e4119
KH
645(modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
646(modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
647
4ed46869
KH
648;; Korean character set (KSC5601)
649
269a5dd0 650;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
4ed46869
KH
651(modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
652(modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
653(modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
654(modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
655(modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
656(modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
657
658(modify-category-entry (make-char 'korean-ksc5601) ?h)
659(modify-category-entry (make-char 'korean-ksc5601 35) ?A)
660(modify-category-entry (make-char 'korean-ksc5601 37) ?G)
661(modify-category-entry (make-char 'korean-ksc5601 42) ?H)
662(modify-category-entry (make-char 'korean-ksc5601 43) ?K)
663(modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
664
013d402e 665(let ((parens "\e$(C!2!3!4!5!6!7!8!9!:!;!<!=#(#)#[#]#{#}\e(B" )
70abfe90
KH
666 open close)
667 (dotimes (i (/ (length parens) 2))
668 (setq open (aref parens (* i 2))
669 close (aref parens (1+ (* i 2))))
670 (modify-syntax-entry open (format "(%c" close))
671 (modify-syntax-entry close (format ")%c" open))))
672
d05cfa1f
KH
673;; Latin character set (latin-1,2,3,4,5,8,9)
674
675(modify-category-entry (make-char 'latin-iso8859-1) ?l)
676(modify-category-entry (make-char 'latin-iso8859-2) ?l)
677(modify-category-entry (make-char 'latin-iso8859-3) ?l)
678(modify-category-entry (make-char 'latin-iso8859-4) ?l)
679(modify-category-entry (make-char 'latin-iso8859-9) ?l)
680(modify-category-entry (make-char 'latin-iso8859-14) ?l)
681(modify-category-entry (make-char 'latin-iso8859-15) ?l)
682
683(modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
684(modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
685(modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
686(modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
687(modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
688(modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
689(modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
690
6eba8645
KH
691;; Lao character set
692
693(modify-category-entry (make-char 'lao) ?o)
269a5dd0
DL
694(dotimes (i (1+ (- #xeff #xe80)))
695 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
6eba8645
KH
696
697(let ((deflist '(;; chars syntax category
698 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
699 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
700 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
701 ("\e(1XY\e(B" "w" ?3) ; vowel lower
a1506d29 702 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
6eba8645 703 ("\e(1\\e(B" "w" ?9) ; semivowel lower
9765a2ba 704 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
d5f05a67 705 ("\e(1Of\e(B" "_" ?5) ; symbol
269a5dd0
DL
706 ;; Unicode equivalents
707 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
708 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
709 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
710 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
a1506d29 711 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
269a5dd0
DL
712 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
713 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
714 ("\e$,1DODf\e(B" "_" ?5) ; symbol
6eba8645
KH
715 ))
716 elm chars len syntax category to ch i)
717 (while deflist
718 (setq elm (car deflist))
719 (setq chars (car elm)
720 len (length chars)
721 syntax (nth 1 elm)
722 category (nth 2 elm)
723 i 0)
724 (while (< i len)
725 (if (= (aref chars i) ?-)
726 (setq i (1+ i)
4a027a0d
KH
727 to (aref chars i))
728 (setq ch (aref chars i)
6eba8645
KH
729 to ch))
730 (while (<= ch to)
269a5dd0
DL
731 (unless (string-equal syntax "w")
732 (modify-syntax-entry ch syntax))
6eba8645
KH
733 (modify-category-entry ch category)
734 (setq ch (1+ ch)))
4a027a0d 735 (setq i (1+ i)))
6eba8645
KH
736 (setq deflist (cdr deflist))))
737
4ed46869
KH
738;; Thai character set (TIS620)
739
740(modify-category-entry (make-char 'thai-tis620) ?t)
269a5dd0
DL
741(dotimes (i (1+ (- #xe7f #xe00)))
742 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
4ed46869
KH
743
744(let ((deflist '(;; chars syntax category
745 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
746 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
747 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
748 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
a1506d29 749 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
9765a2ba
KH
750 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
751 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
269a5dd0
DL
752 ;; Unicode equivalents
753 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
754 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
755 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
756 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
a1506d29 757 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
269a5dd0
DL
758 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
759 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
4ed46869
KH
760 ))
761 elm chars len syntax category to ch i)
9395eb7c
KH
762 (while deflist
763 (setq elm (car deflist))
764 (setq chars (car elm)
765 len (length chars)
766 syntax (nth 1 elm)
767 category (nth 2 elm)
768 i 0)
769 (while (< i len)
770 (if (= (aref chars i) ?-)
771 (setq i (1+ i)
4a027a0d
KH
772 to (aref chars i))
773 (setq ch (aref chars i)
9395eb7c
KH
774 to ch))
775 (while (<= ch to)
269a5dd0
DL
776 (unless (string-equal syntax "w")
777 (modify-syntax-entry ch syntax))
9395eb7c
KH
778 (modify-category-entry ch category)
779 (setq ch (1+ ch)))
4a027a0d 780 (setq i (1+ i)))
9395eb7c
KH
781 (setq deflist (cdr deflist))))
782
783;; Tibetan character set
784
16230888
KH
785(modify-category-entry (make-char 'tibetan) ?q)
786(modify-category-entry (make-char 'tibetan-1-column) ?q)
269a5dd0
DL
787(dotimes (i (1+ (- #xfff #xf00)))
788 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
9395eb7c
KH
789
790(let ((deflist '(;; chars syntax category
269a5dd0 791 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
e6f02372 792 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
9395eb7c
KH
793 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
794 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
795 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
796 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
e6f02372 797 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
9395eb7c
KH
798 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
799 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
800 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
801 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
802 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
803 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
e6f02372 804 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
269a5dd0
DL
805
806 ;; Unicode version (not complete)
807 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
808 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
809 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
810 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
811 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
812 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
813 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
814 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
815 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
816 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
9395eb7c
KH
817 ))
818 elm chars len syntax category to ch i)
4ed46869
KH
819 (while deflist
820 (setq elm (car deflist))
821 (setq chars (car elm)
822 len (length chars)
823 syntax (nth 1 elm)
824 category (nth 2 elm)
825 i 0)
826 (while (< i len)
827 (if (= (aref chars i) ?-)
828 (setq i (1+ i)
4a027a0d
KH
829 to (aref chars i))
830 (setq ch (aref chars i)
4ed46869
KH
831 to ch))
832 (while (<= ch to)
269a5dd0
DL
833 (unless (string-equal syntax "w")
834 (modify-syntax-entry ch syntax))
4ed46869
KH
835 (modify-category-entry ch category)
836 (setq ch (1+ ch)))
4a027a0d 837 (setq i (1+ i)))
4ed46869
KH
838 (setq deflist (cdr deflist))))
839
840;; Vietnamese character set
841
842(let ((lower (make-char 'vietnamese-viscii-lower))
843 (upper (make-char 'vietnamese-viscii-upper)))
269a5dd0
DL
844;; (modify-syntax-entry lower "w")
845;; (modify-syntax-entry upper "w")
4ed46869
KH
846 (modify-category-entry lower ?v)
847 (modify-category-entry upper ?v)
848 (modify-category-entry lower ?l) ; To make a word with
849 (modify-category-entry upper ?l) ; latin characters.
850 )
851
e5dd1155
KH
852(let ((tbl (standard-case-table))
853 (i 32))
854 (while (< i 128)
855 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
856 (make-char 'vietnamese-viscii-lower i)
857 tbl)
858 (setq i (1+ i))))
859
d05cfa1f
KH
860;; Unicode (mule-unicode-0100-24ff)
861
85ef8ece
KH
862(let ((tbl (standard-case-table)) c)
863
864;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
865;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
866;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
867;; Thus we have to check language-environment to handle casing
868;; correctly. Currently only I<->i is available.
869
85ef8ece
KH
870 ;; Latin Extended-A, Latin Extended-B
871 (setq c #x0100)
872 (while (<= c #x0233)
d05cfa1f 873 (modify-category-entry (decode-char 'ucs c) ?l)
85ef8ece
KH
874 (and (or (<= c #x012e)
875 (and (>= c #x014a) (<= c #x0177)))
d05cfa1f
KH
876 (zerop (% c 2))
877 (set-case-syntax-pair
878 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
85ef8ece
KH
879 (and (>= c #x013a)
880 (<= c #x0148)
881 (zerop (% c 2))
882 (set-case-syntax-pair
883 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
d05cfa1f 884 (setq c (1+ c)))
85ef8ece
KH
885 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
886 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
887 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
8325c01e 888;;; (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl) ; these two have different length!
d05cfa1f
KH
889 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
890 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
891 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
892
269a5dd0
DL
893 ;; Latin Extended-B
894 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
895 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
896 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
897 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
898 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
899 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
900 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
901 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
902 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
903 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
904 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
905 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
906 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
907 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
908 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
909 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
910 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
911 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
912 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
913 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
914 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
915 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
916 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
917 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
918 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
919 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
920 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
921 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
922 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
923 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
924 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
925 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
926 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
927 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
928 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
929 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
930 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
931 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
932 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
933 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
934 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
935 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
936 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
937 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
938 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
939 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
940 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
941 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
942 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
943 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
944 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
945 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
946 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
947 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
948 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
949 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
950 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
951 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
952 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
953 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
954 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
955 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
956 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
957 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
958 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
959 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
960 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
961 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
962 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
963 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
964 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
965 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
966 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
967 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
968 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
969 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
970 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
971 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
972 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
973 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
974 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
975 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
976 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
977 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
978 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
979 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
980 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
981 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
982 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
983 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
984 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
985 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
986 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
987 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
988
85ef8ece
KH
989 ;; Latin Extended Additional
990 (setq c #x1e00)
d05cfa1f
KH
991 (while (<= c #x1ef9)
992 (modify-category-entry (decode-char 'ucs c) ?l)
993 (and (zerop (% c 2))
994 (or (<= c #x1e94) (>= c #x1ea0))
995 (set-case-syntax-pair
996 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
997 (setq c (1+ c)))
998
85ef8ece
KH
999 ;; Greek
1000 (setq c #x0370)
d05cfa1f
KH
1001 (while (<= c #x03ff)
1002 (modify-category-entry (decode-char 'ucs c) ?g)
1003 (if (or (and (>= c #x0391) (<= c #x03a1))
1004 (and (>= c #x03a3) (<= c #x03ab)))
1005 (set-case-syntax-pair
1006 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1007 (and (>= c #x03da)
1008 (<= c #x03ee)
1009 (zerop (% c 2))
1010 (set-case-syntax-pair
1011 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1012 (setq c (1+ c)))
1013 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
1014 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
1015 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
1016 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
1017 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
1018 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
1019 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
1020
269a5dd0
DL
1021 ;; Armenian
1022 (setq c #x531)
1023 (while (<= c #x556)
1024 (set-case-syntax-pair (decode-char 'ucs c)
1025 (decode-char 'ucs (+ c #x30)) tbl)
1026 (setq c (1+ c)))
1027
85ef8ece
KH
1028 ;; Greek Extended
1029 (setq c #x1f00)
d05cfa1f
KH
1030 (while (<= c #x1fff)
1031 (modify-category-entry (decode-char 'ucs c) ?g)
1032 (and (<= (logand c #x000f) 7)
1033 (<= c #x1fa7)
1034 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1035 (/= (logand c #x00f0) 7)
1036 (set-case-syntax-pair
1037 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1038 (setq c (1+ c)))
1039 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1040 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1041 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1042 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1043 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1044 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1045 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1046 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1047 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1048 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1049 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1050 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1051 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1052 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1053 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1054 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1055 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1056 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1057 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1058 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1059 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1060 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1061 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1062 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1063
85ef8ece
KH
1064 ;; cyrillic
1065 (setq c #x0400)
d05cfa1f
KH
1066 (while (<= c #x04ff)
1067 (modify-category-entry (decode-char 'ucs c) ?y)
1068 (and (>= c #x0400)
1069 (<= c #x040f)
1070 (set-case-syntax-pair
1071 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1072 (and (>= c #x0410)
1073 (<= c #x042f)
1074 (set-case-syntax-pair
1075 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1076 (and (zerop (% c 2))
1077 (or (and (>= c #x0460) (<= c #x0480))
1078 (and (>= c #x048c) (<= c #x04be))
1079 (and (>= c #x04d0) (<= c #x04f4)))
1080 (set-case-syntax-pair
a1506d29 1081 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
d05cfa1f
KH
1082 (setq c (1+ c)))
1083 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1084 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1085 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1086 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1087 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1088
85ef8ece
KH
1089 ;; general punctuation
1090 (setq c #x2000)
d05cfa1f 1091 (while (<= c #x200b)
348a6a50 1092 (set-case-syntax (decode-char 'ucs c) " " tbl)
c1dc897c 1093 (setq c (1+ c)))
d05cfa1f 1094 (setq c #x2010)
c1dc897c
DL
1095 (while (<= c #x2016)
1096 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1097 (setq c (1+ c)))
1098 ;; Punctuation syntax for quotation marks (like `)
1099 (while (<= c #x201f)
1100 (set-case-syntax (decode-char 'ucs c) "." tbl)
1101 (setq c (1+ c)))
d05cfa1f 1102 (while (<= c #x2027)
348a6a50 1103 (set-case-syntax (decode-char 'ucs c) "_" tbl)
c1dc897c 1104 (setq c (1+ c)))
d05cfa1f 1105
269a5dd0
DL
1106 ;; Roman numerals
1107 (setq c #x2160)
1108 (while (<= c #x216f)
1109 (set-case-syntax-pair (decode-char 'ucs c)
1110 (decode-char 'ucs (+ c #x10)) tbl)
1111 (setq c (1+ c)))
1112
1113 ;; Circled Latin
1114 (setq c #x24b6)
1115 (while (<= c #x24cf)
1116 (set-case-syntax-pair (decode-char 'ucs c)
1117 (decode-char 'ucs (+ c 26)) tbl)
1118 (modify-category-entry (decode-char 'ucs c) ?l)
1119 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1120 (setq c (1+ c)))
1121
1122 ;; Fullwidth Latin
1123 (setq c #xff21)
1124 (while (<= c #xff3a)
1125 (set-case-syntax-pair (decode-char 'ucs c)
1126 (decode-char 'ucs (+ c #x20)) tbl)
1127 (modify-category-entry (decode-char 'ucs c) ?l)
1128 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1129 (setq c (1+ c)))
1130
269a5dd0
DL
1131 ;; Combining diacritics
1132 (setq c #x300)
1133 (while (<= c #x362)
1134 (modify-category-entry (decode-char 'ucs c) ?^)
1135 (setq c (1+ c)))
1136
1137 ;; Combining marks
1138 (setq c #x20d0)
1139 (while (<= c #x20e3)
1140 (modify-category-entry (decode-char 'ucs c) ?^)
1141 (setq c (1+ c)))
1142
1143 ;; Fixme: syntax for symbols &c
1144 )
d6af0bff
KH
1145
1146(let ((pairs
1147 '("\e$,1sEsF\e(B" ; U+2045 U+2046
1148 "\e$,1s}s~\e(B" ; U+207D U+207E
1149 "\e$,1t-t.\e(B" ; U+208D U+208E
d6af0bff
KH
1150 "\e$,1{){*\e(B" ; U+2329 U+232A
1151 "\e$,1|T|U\e(B" ; U+23B4 U+23B5
1152 "\e$,2&H&I\e(B" ; U+2768 U+2769
1153 "\e$,2&J&K\e(B" ; U+276A U+276B
1154 "\e$,2&L&M\e(B" ; U+276C U+276D
1155 "\e$,2&P&Q\e(B" ; U+2770 U+2771
1156 "\e$,2&R&S\e(B" ; U+2772 U+2773
1157 "\e$,2&T&U\e(B" ; U+2774 U+2775
1158 "\e$,2'f'g\e(B" ; U+27E6 U+27E7
1159 "\e$,2'h'i\e(B" ; U+27E8 U+27E9
1160 "\e$,2'j'k\e(B" ; U+27EA U+27EB
1161 "\e$,2,#,$\e(B" ; U+2983 U+2984
1162 "\e$,2,%,&\e(B" ; U+2985 U+2986
1163 "\e$,2,',(\e(B" ; U+2987 U+2988
1164 "\e$,2,),*\e(B" ; U+2989 U+298A
1165 "\e$,2,+,,\e(B" ; U+298B U+298C
1166 "\e$,2,-,.\e(B" ; U+298D U+298E
1167 "\e$,2,/,0\e(B" ; U+298F U+2990
1168 "\e$,2,1,2\e(B" ; U+2991 U+2992
1169 "\e$,2,3,4\e(B" ; U+2993 U+2994
1170 "\e$,2,5,6\e(B" ; U+2995 U+2996
1171 "\e$,2,7,8\e(B" ; U+2997 U+2998
1172 "\e$,2-<-=\e(B" ; U+29FC U+29FD
1173 "\e$,2=H=I\e(B" ; U+3008 U+3009
1174 "\e$,2=J=K\e(B" ; U+300A U+300B
1175 "\e$,2=L=M\e(B" ; U+300C U+300D
1176 "\e$,2=N=O\e(B" ; U+300E U+300F
1177 "\e$,2=P=Q\e(B" ; U+3010 U+3011
1178 "\e$,2=T=U\e(B" ; U+3014 U+3015
1179 "\e$,2=V=W\e(B" ; U+3016 U+3017
1180 "\e$,2=X=Y\e(B" ; U+3018 U+3019
1181 "\e$,2=Z=[\e(B" ; U+301A U+301B
1182 "\e$,3m~m\7f\e(B" ; U+FD3E U+FD3F
1183 "\e$,3pUpV\e(B" ; U+FE35 U+FE36
1184 "\e$,3pWpX\e(B" ; U+FE37 U+FE38
1185 "\e$,3pYpZ\e(B" ; U+FE39 U+FE3A
1186 "\e$,3p[p\\e(B" ; U+FE3B U+FE3C
1187 "\e$,3p]p^\e(B" ; U+FE3D U+FE3E
1188 "\e$,3p_p`\e(B" ; U+FE3F U+FE40
1189 "\e$,3papb\e(B" ; U+FE41 U+FE42
1190 "\e$,3pcpd\e(B" ; U+FE43 U+FE44
1191 "\e$,3pypz\e(B" ; U+FE59 U+FE5A
1192 "\e$,3p{p|\e(B" ; U+FE5B U+FE5C
1193 "\e$,3p}p~\e(B" ; U+FE5D U+FE5E
1194 "\e$,3rhri\e(B" ; U+FF08 U+FF09
1195 "\e$,3s;s=\e(B" ; U+FF3B U+FF3D
1196 "\e$,3s[s]\e(B" ; U+FF5B U+FF5D
1197 "\e$,3s_s`\e(B" ; U+FF5F U+FF60
1198 "\e$,3sbsc\e(B" ; U+FF62 U+FF63
1199 )))
1200 (dolist (elt pairs)
1201 (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
1202 (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
1203
4ed46869
KH
1204\f
1205;;; Setting word boundary.
1206
1207(setq word-combining-categories
1208 '((?l . ?l)))
1209
1210(setq word-separating-categories ; (2-byte character sets)
1211 '((?A . ?K) ; Alpha numeric - Katakana
1212 (?A . ?C) ; Alpha numeric - Chinese
1213 (?H . ?A) ; Hiragana - Alpha numeric
1214 (?H . ?K) ; Hiragana - Katakana
1215 (?H . ?C) ; Hiragana - Chinese
1216 (?K . ?A) ; Katakana - Alpha numeric
1217 (?K . ?C) ; Katakana - Chinese
1218 (?C . ?A) ; Chinese - Alpha numeric
1219 (?C . ?K) ; Chinese - Katakana
1220 ))
777cfce6
KH
1221
1222\f
1223;; For each character set, put the information of the most proper
aaa9f206 1224;; coding system to encode it by `preferred-coding-system' property.
777cfce6
KH
1225
1226(let ((l '((latin-iso8859-1 . iso-latin-1)
1227 (latin-iso8859-2 . iso-latin-2)
1228 (latin-iso8859-3 . iso-latin-3)
1229 (latin-iso8859-4 . iso-latin-4)
1230 (thai-tis620 . thai-tis620)
1231 (greek-iso8859-7 . greek-iso-8bit)
1232 (arabic-iso8859-6 . iso-2022-7bit)
1233 (hebrew-iso8859-8 . hebrew-iso-8bit)
1234 (katakana-jisx0201 . japanese-shift-jis)
1235 (latin-jisx0201 . japanese-shift-jis)
1236 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1237 (latin-iso8859-9 . iso-latin-5)
1238 (japanese-jisx0208-1978 . iso-2022-jp)
1239 (chinese-gb2312 . cn-gb-2312)
1240 (japanese-jisx0208 . iso-2022-jp)
1241 (korean-ksc5601 . iso-2022-kr)
1242 (japanese-jisx0212 . iso-2022-jp)
1243 (chinese-cns11643-1 . iso-2022-cn)
1244 (chinese-cns11643-2 . iso-2022-cn)
1245 (chinese-big5-1 . chinese-big5)
1246 (chinese-big5-2 . chinese-big5)
1247 (chinese-sisheng . iso-2022-7bit)
1248 (ipa . iso-2022-7bit)
1249 (vietnamese-viscii-lower . vietnamese-viscii)
1250 (vietnamese-viscii-upper . vietnamese-viscii)
1251 (arabic-digit . iso-2022-7bit)
1252 (arabic-1-column . iso-2022-7bit)
1253 (ascii-right-to-left . iso-2022-7bit)
1254 (lao . lao)
1255 (arabic-2-column . iso-2022-7bit)
1256 (indian-is13194 . devanagari)
69e138b2 1257 (indian-glyph . devanagari)
777cfce6 1258 (tibetan-1-column . tibetan)
58cd41a3 1259 (ethiopic . iso-2022-7bit)
777cfce6
KH
1260 (chinese-cns11643-3 . iso-2022-cn)
1261 (chinese-cns11643-4 . iso-2022-cn)
1262 (chinese-cns11643-5 . iso-2022-cn)
1263 (chinese-cns11643-6 . iso-2022-cn)
1264 (chinese-cns11643-7 . iso-2022-cn)
1265 (indian-2-column . devanagari)
7a860cf2
DL
1266 (tibetan . tibetan)
1267 (latin-iso8859-14 . iso-latin-8)
1268 (latin-iso8859-15 . iso-latin-9))))
777cfce6 1269 (while l
aaa9f206 1270 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
777cfce6 1271 (setq l (cdr l))))
df0415c5
KH
1272
1273\f
98a663f1 1274;; Setup auto-fill-chars for charsets that should invoke auto-filling.
269a5dd0
DL
1275;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1276;; property on the charsets.
df0415c5
KH
1277(let ((l '(katakana-jisx0201
1278 japanese-jisx0208 japanese-jisx0212
1279 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1280 (while l
1281 (aset auto-fill-chars (make-char (car l)) t)
98a663f1 1282 (put-charset-property (car l) 'nospace-between-words t)
df0415c5 1283 (setq l (cdr l))))
777cfce6 1284
ae0916f8
KH
1285\f
1286(setq utf-translate-cjk-mode saved-utf-translate-cjk-mode)
1287(makunbound 'saved-utf-translate-cjk-mode)
1288
777cfce6
KH
1289;;; Local Variables:
1290;;; coding: iso-2022-7bit
1291;;; End:
1292
ab5796a9 1293;;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d
60370d40 1294;;; characters.el ends here