(latin1-display-setup): Check each character is displayable or not
[bpt/emacs.git] / lisp / international / characters.el
CommitLineData
4ed46869
KH
1;;; characters.el --- set syntax and category for multibyte characters
2
28636af6 3;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
fa526c4a 4;; Licensed to the Free Software Foundation.
cf6af551 5;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4ed46869
KH
6
7;; Keywords: multibyte character, character set, syntax, category
8
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
13;; the Free Software Foundation; either version 2, or (at your option)
14;; any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
369314dc
KH
22;; along with GNU Emacs; see the file COPYING. If not, write to the
23;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24;; Boston, MA 02111-1307, USA.
4ed46869
KH
25
26;;; Commentary:
27
28;; This file contains multibyte characters. Save this file always in
4b16fa0c 29;; the coding system `iso-2022-7bit'.
4ed46869 30
49adf443
RS
31;; This file does not define the syntax for Latin-N character sets;
32;; those are defined by the files latin-N.el.
33
60370d40
PJ
34;;; Code:
35
4ed46869
KH
36;;; Predefined categories.
37
38;; For each character set.
39
40(define-category ?a "ASCII")
41(define-category ?l "Latin")
42(define-category ?t "Thai")
43(define-category ?g "Greek")
44(define-category ?b "Arabic")
45(define-category ?w "Hebrew")
46(define-category ?y "Cyrillic")
47(define-category ?k "Japanese katakana")
48(define-category ?r "Japanese roman")
49(define-category ?c "Chinese")
50(define-category ?j "Japanese")
51(define-category ?h "Korean")
52(define-category ?e "Ethiopic (Ge'ez)")
53(define-category ?v "Vietnamese")
54(define-category ?i "Indian")
6eba8645 55(define-category ?o "Lao")
9395eb7c 56(define-category ?q "Tibetan")
4ed46869
KH
57
58;; For each group (row) of 2-byte character sets.
59
94487c4e 60(define-category ?A "Alpha-numeric characters of 2-byte character sets")
4ed46869 61(define-category ?C "Chinese (Han) characters of 2-byte character sets")
94487c4e 62(define-category ?G "Greek characters of 2-byte character sets")
4ed46869
KH
63(define-category ?H "Japanese Hiragana characters of 2-byte character sets")
64(define-category ?K "Japanese Katakana characters of 2-byte character sets")
65(define-category ?N "Korean Hangul characters of 2-byte character sets")
94487c4e 66(define-category ?Y "Cyrillic characters of 2-byte character sets")
4ed46869
KH
67(define-category ?I "Indian Glyphs")
68
69;; For phonetic classifications.
70
71(define-category ?0 "consonant")
9765a2ba 72(define-category ?1 "base (independent) vowel")
4ed46869
KH
73(define-category ?2 "upper diacritical mark (including upper vowel)")
74(define-category ?3 "lower diacritical mark (including lower vowel)")
75(define-category ?4 "tone mark")
9765a2ba 76(define-category ?5 "symbol")
4ed46869
KH
77(define-category ?6 "digit")
78(define-category ?7 "vowel-modifying diacritical mark")
6eba8645
KH
79(define-category ?8 "vowel-signs")
80(define-category ?9 "semivowel lower")
4ed46869
KH
81
82;; For filling.
83(define-category ?| "While filling, we can break a line at this character.")
84
504af7b2 85;; For indentation calculation.
70ea295a 86(define-category ?\s
777cfce6 87 "This character counts as a space for indentation purposes.")
504af7b2 88
94487c4e 89;; Keep the following for `kinsoku' processing. See comments in
4ed46869
KH
90;; kinsoku.el.
91(define-category ?> "A character which can't be placed at beginning of line.")
92(define-category ?< "A character which can't be placed at end of line.")
93
269a5dd0
DL
94;; Combining
95(define-category ?^ "Combining diacritic or mark")
4ed46869
KH
96\f
97;;; Setting syntax and category.
98
99;; ASCII
100
101(let ((ch 32))
102 (while (< ch 127) ; All ASCII characters have
103 (modify-category-entry ch ?a) ; the category `a' (ASCII)
104 (modify-category-entry ch ?l) ; and `l' (Latin).
105 (setq ch (1+ ch))))
106
107;; Arabic character set
108
109(let ((charsets '(arabic-iso8859-6
110 arabic-digit
111 arabic-1-column
112 arabic-2-column)))
113 (while charsets
269a5dd0 114;; (modify-syntax-entry (make-char (car charsets)) "w")
4ed46869
KH
115 (modify-category-entry (make-char (car charsets)) ?b)
116 (setq charsets (cdr charsets))))
269a5dd0
DL
117(let ((ch #x600))
118 (while (<= ch #x6ff)
119 (modify-category-entry (decode-char 'ucs ch) ?b)
120 (setq ch (1+ ch)))
121 (setq ch #xfb50)
122 (while (<= ch #xfdff)
123 (modify-category-entry (decode-char 'ucs ch) ?b)
124 (setq ch (1+ ch)))
125 (setq ch #xfe70)
126 (while (<= ch #xfefe)
127 (modify-category-entry (decode-char 'ucs ch) ?b)
128 (setq ch (1+ ch))))
4ed46869
KH
129
130;; Chinese character set (GB2312)
131
269a5dd0 132;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
4ed46869
KH
133(modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
134(modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
135(modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
136(modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
137(modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
138(modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
139(modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
140(modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
141(modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
142(modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
840f8f73
KH
143(modify-syntax-entry ?\\e$A#(\e(B "(\e$A#)\e(B")
144(modify-syntax-entry ?\\e$A#{\e(B "(\e$A#}\e(B")
145(modify-syntax-entry ?\\e$A#[\e(B "(\e$A#]\e(B")
4ed46869
KH
146(modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
147(modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
148(modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
149(modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
150(modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
151(modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
152(modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
840f8f73
KH
153(modify-syntax-entry ?\\e$A#)\e(B ")\e$A#(\e(B")
154(modify-syntax-entry ?\\e$A#}\e(B ")\e$A#{\e(B")
155(modify-syntax-entry ?\\e$A#]\e(B ")\e$A#[\e(B")
4ed46869 156
840f8f73
KH
157(let ((chars "\e$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d\e(B"))
158 (dotimes (i (length chars))
159 (modify-syntax-entry (aref chars i) ".")))
160
4ed46869
KH
161(modify-category-entry (make-char 'chinese-gb2312) ?c)
162(modify-category-entry (make-char 'chinese-gb2312) ?\|)
163(modify-category-entry (make-char 'chinese-gb2312 35) ?A)
164(modify-category-entry (make-char 'chinese-gb2312 36) ?H)
165(modify-category-entry (make-char 'chinese-gb2312 37) ?K)
166(modify-category-entry (make-char 'chinese-gb2312 38) ?G)
167(modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
4ed46869
KH
168(let ((row 48))
169 (while (< row 127)
170 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
171 (setq row (1+ row))))
172
173;; Chinese character set (BIG5)
174
840f8f73
KH
175(let ((from (decode-big5-char #xA141))
176 (to (decode-big5-char #xA15D)))
177 (while (< from to)
178 (modify-syntax-entry from ".")
179 (setq from (1+ from))))
180(let ((from (decode-big5-char #xA1A5))
181 (to (decode-big5-char #xA1AD)))
182 (while (< from to)
183 (modify-syntax-entry from ".")
184 (setq from (1+ from))))
185(let ((from (decode-big5-char #xA1AD))
186 (to (decode-big5-char #xA2AF)))
187 (while (< from to)
188 (modify-syntax-entry from "_")
189 (setq from (1+ from))))
190
191(let ((parens "\e$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
192 open close)
193 (dotimes (i (/ (length parens) 2))
194 (setq open (aref parens (* i 2))
195 close (aref parens (1+ (* i 2))))
196 (modify-syntax-entry open (format "(%c" close))
197 (modify-syntax-entry close (format ")%c" open))))
198
4ed46869
KH
199(let ((generic-big5-1-char (make-char 'chinese-big5-1))
200 (generic-big5-2-char (make-char 'chinese-big5-2)))
269a5dd0
DL
201;; (modify-syntax-entry generic-big5-1-char "w")
202;; (modify-syntax-entry generic-big5-2-char "w")
4ed46869
KH
203
204 (modify-category-entry generic-big5-1-char ?c)
205 (modify-category-entry generic-big5-2-char ?c)
206
207 (modify-category-entry generic-big5-1-char ?C)
208 (modify-category-entry generic-big5-2-char ?C)
209
210 (modify-category-entry generic-big5-1-char ?\|)
211 (modify-category-entry generic-big5-2-char ?\|))
212
213
214;; Chinese character set (CNS11643)
215
216(let ((cns-list '(chinese-cns11643-1
217 chinese-cns11643-2
218 chinese-cns11643-3
219 chinese-cns11643-4
220 chinese-cns11643-5
221 chinese-cns11643-6
222 chinese-cns11643-7))
223 generic-char)
224 (while cns-list
225 (setq generic-char (make-char (car cns-list)))
269a5dd0 226;; (modify-syntax-entry generic-char "w")
4ed46869
KH
227 (modify-category-entry generic-char ?c)
228 (modify-category-entry generic-char ?C)
229 (modify-category-entry generic-char ?|)
230 (setq cns-list (cdr cns-list))))
231
232;; Cyrillic character set (ISO-8859-5)
233
234(modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
235
28636af6 236(modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
4ed46869
KH
237(modify-syntax-entry ?\e,L-\e(B ".")
238(modify-syntax-entry ?\e,Lp\e(B ".")
239(modify-syntax-entry ?\e,L}\e(B ".")
28636af6
RS
240(let ((tbl (standard-case-table)))
241 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
242 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
243 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
244 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
245 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
246 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
247 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
248 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
249 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
250 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
251 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
252 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
253 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
254 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
255 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
256 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
257 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
258 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
259 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
260 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
261 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
262 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
263 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
264 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
265 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
266 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
267 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
268 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
269 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
270 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
271 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
272 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
273 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
274 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
275 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
276 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
277 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
278 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
279 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
280 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
281 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
282 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
283 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
284 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
285 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
269a5dd0
DL
286 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
287 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
288 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
289 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
290 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
291 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
292 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
293 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
294 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
295 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
296 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
297 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
298 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
299 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
300 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
301 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
302 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
303 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
304 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
305 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
306 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
307 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
308 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
309 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
310 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
311 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
312 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
313 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
314 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
315 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
316 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
317 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
318 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
319 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
320 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
321 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
322 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
323 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
324 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
325 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
326 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
327 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
328 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
329 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
330 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
331 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
332 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
4ed46869 333
9395eb7c
KH
334;; Devanagari character set
335
269a5dd0
DL
336;;; Commented out since the categories appear not to be used anywhere
337;;; and word syntax is the default.
338;; (let ((deflist '(;; chars syntax category
339;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
340;; ; chandrabindu, anuswar, visarga
341;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
342;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
343;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
344;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
345;; ;; Unicode equivalents
346;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
347;; ; chandrabindu, anuswar, visarga
348;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
349;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
350;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
351;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
352;; ))
353;; elm chars len syntax category to ch i)
354;; (while deflist
355;; (setq elm (car deflist))
356;; (setq chars (car elm)
357;; len (length chars)
358;; syntax (nth 1 elm)
359;; category (nth 2 elm)
360;; i 0)
361;; (while (< i len)
362;; (if (= (aref chars i) ?-)
363;; (setq i (1+ i)
364;; to (aref chars i))
365;; (setq ch (aref chars i)
366;; to ch))
367;; (while (<= ch to)
368;; (modify-syntax-entry ch syntax)
369;; (modify-category-entry ch category)
370;; (setq ch (1+ ch)))
371;; (setq i (1+ i)))
372;; (setq deflist (cdr deflist))))
9395eb7c 373
4ed46869
KH
374;; Ethiopic character set
375
376(modify-category-entry (make-char 'ethiopic) ?e)
269a5dd0
DL
377;; (modify-syntax-entry (make-char 'ethiopic) "w")
378(dotimes (i (1+ (- #x137c #x1200)))
379 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
380(let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
381 ;; Unicode equivalents of the above:
382 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
c23d3872
KH
383 (while chars
384 (modify-syntax-entry (car chars) ".")
385 (setq chars (cdr chars))))
4ed46869 386
4ed46869
KH
387;; Greek character set (ISO-8859-7)
388
389(modify-category-entry (make-char 'greek-iso8859-7) ?g)
269a5dd0
DL
390(let ((c #x370))
391 (while (<= c #x3ff)
392 (modify-category-entry (decode-char 'ucs c) ?g)
4ed46869 393 (setq c (1+ c))))
269a5dd0
DL
394
395;; (let ((c 182))
396;; (while (< c 255)
397;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
398;; (setq c (1+ c))))
399;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
4ed46869
KH
400(modify-syntax-entry ?\e,F7\e(B ".")
401(modify-syntax-entry ?\e,F;\e(B ".")
402(modify-syntax-entry ?\e,F=\e(B ".")
4b7c7a40 403(let ((tbl (standard-case-table)))
269a5dd0
DL
404 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
405 ;; in several cases.
406 (set-case-syntax ?\e,F!\e(B "." tbl)
407 (set-case-syntax ?\e,F"\e(B "." tbl)
408 (set-case-syntax ?\e,F&\e(B "." tbl)
409 (set-case-syntax ?\e,F&\e(B "_" tbl)
410 (set-case-syntax ?\e,F'\e(B "." tbl)
411 (set-case-syntax ?\e,F)\e(B "_" tbl)
412 (set-case-syntax ?\e,F+\e(B "." tbl)
413 (set-case-syntax ?\e,F,\e(B "_" tbl)
414 (set-case-syntax ?\e,F-\e(B "_" tbl)
415 (set-case-syntax ?\e,F/\e(B "." tbl)
416 (set-case-syntax ?\e,F0\e(B "_" tbl)
417 (set-case-syntax ?\e,F1\e(B "_" tbl)
418;; (set-case-syntax ?\e,F7\e(B "_" tbl)
419;; (set-case-syntax ?\e,F=\e(B "_" tbl)
4b7c7a40
DL
420 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
421 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
422 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
423 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
424 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
425 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
426 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
427 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
428 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
429 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
430 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
431 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
432 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
433 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
434 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
435 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
436 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
437 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
438 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
439 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
440 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
441 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
442 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
443 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
444 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
445 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
446 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
447 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
448 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
449 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
450 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
451 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
269a5dd0
DL
452 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
453 ;; Unicode equivalents
454 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
455 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
456 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
457 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
458 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
459 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
460 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
461 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
462 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
463 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
464 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
465 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
466 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
467 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
468 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
469 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
470 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
471 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
472 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
473 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
474 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
475 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
476 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
477 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
478 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
479 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
480 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
481 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
482 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
483 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
484 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
485 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
486 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
4ed46869
KH
487
488;; Hebrew character set (ISO-8859-8)
489
227f528e
EZ
490(modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
491(let ((c #x591))
492 (while (<= c #x5f4)
493 (modify-category-entry (decode-char 'ucs c) ?w)
494 (setq c (1+ c))))
495
496(modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
497(modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
498(modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
499(modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
500(modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
501(modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
502(modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
4ed46869 503
269a5dd0
DL
504;; (let ((c 224))
505;; (while (< c 251)
506;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
507;; (setq c (1+ c))))
508;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
4ed46869
KH
509
510;; Indian character set (IS 13194 and other Emacs original Indian charsets)
511
512(modify-category-entry (make-char 'indian-is13194) ?i)
513(modify-category-entry (make-char 'indian-2-column) ?I)
69e138b2 514(modify-category-entry (make-char 'indian-glyph) ?I)
269a5dd0
DL
515;; Unicode Devanagari block
516(let ((c #x901))
517 (while (<= c #x970)
518 (modify-category-entry (decode-char 'ucs c) ?i)
519 (setq c (1+ c))))
4ed46869 520
ed459fb6
KH
521(let ((l '(;; RANGE CATEGORY MEANINGS
522 (#x01 #x03 ?7) ; vowel modifier
523 (#x05 #x14 ?1) ; base vowel
524 (#x15 #x39 ?0) ; consonants
525 (#x3e #x4d ?8) ; vowel modifier
526 (#x51 #x54 ?4) ; stress/tone mark
527 (#x58 #x5f ?0) ; consonants
528 (#x60 #x61 ?1) ; base vowel
529 (#x62 #x63 ?8) ; vowel modifier
530 (#x66 #x6f ?6) ; digits
531 )))
532 (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00))
533 (dolist (elt2 l)
534 (let* ((from (car elt2))
535 (counts (1+ (- (nth 1 elt2) from)))
536 (category (nth 2 elt2)))
537 (dotimes (i counts)
538 (modify-category-entry (decode-char 'ucs (+ elt1 from i))
539 category))))))
4a027a0d 540
4ed46869
KH
541;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
542
543(modify-category-entry (make-char 'katakana-jisx0201) ?k)
226e4119 544(modify-category-entry (make-char 'katakana-jisx0201) ?j)
4ed46869
KH
545(modify-category-entry (make-char 'latin-jisx0201) ?r)
546(modify-category-entry (make-char 'japanese-jisx0208) ?j)
547(modify-category-entry (make-char 'japanese-jisx0212) ?j)
9395eb7c 548(modify-category-entry (make-char 'katakana-jisx0201) ?\|)
4ed46869 549(modify-category-entry (make-char 'japanese-jisx0208) ?\|)
9395eb7c 550(modify-category-entry (make-char 'japanese-jisx0212) ?\|)
4ed46869 551
269a5dd0
DL
552;; Unicode equivalents of JISX0201-kana
553(let ((c #xff61))
554 (while (<= c #xff9f)
555 (modify-category-entry (decode-char 'ucs c) ?k)
556 (modify-category-entry (decode-char 'ucs c) ?j)
557 (modify-category-entry (decode-char 'ucs c) ?\|)
558 (setq c (1+ c))))
559
560;; Katakana block
561(let ((c #x30a0))
562 (while (<= c #x30ff)
563 ;; ?K is double width, ?k isn't specified
564 (modify-category-entry (decode-char 'ucs c) ?k)
565 (modify-category-entry (decode-char 'ucs c) ?j)
a1506d29 566 (modify-category-entry (decode-char 'ucs c) ?\|)
269a5dd0
DL
567 (setq c (1+ c))))
568
569;; Hiragana block
570(let ((c #x3040))
571 (while (<= c #x309f)
572 ;; ?H is actually defined to be double width
573 (modify-category-entry (decode-char 'ucs c) ?H)
574 ;;(modify-category-entry (decode-char 'ucs c) ?j)
a1506d29 575 (modify-category-entry (decode-char 'ucs c) ?\|)
269a5dd0
DL
576 (setq c (1+ c))))
577
4ed46869 578;; JISX0208
269a5dd0 579;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
4ed46869
KH
580(modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
581(modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
582(modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
cf6af551
MB
583(let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
584 (while chars
585 (modify-syntax-entry (car chars) "w")
586 (setq chars (cdr chars))))
4ed46869
KH
587(modify-syntax-entry ?\\e$B!J\e(B "(\e$B!K\e(B")
588(modify-syntax-entry ?\\e$B!N\e(B "(\e$B!O\e(B")
589(modify-syntax-entry ?\\e$B!P\e(B "(\e$B!Q\e(B")
590(modify-syntax-entry ?\\e$B!V\e(B "(\e$B!W\e(B")
591(modify-syntax-entry ?\\e$B!X\e(B "(\e$B!Y\e(B")
592(modify-syntax-entry ?\\e$B!K\e(B ")\e$B!J\e(B")
593(modify-syntax-entry ?\\e$B!O\e(B ")\e$B!N\e(B")
594(modify-syntax-entry ?\\e$B!Q\e(B ")\e$B!P\e(B")
595(modify-syntax-entry ?\\e$B!W\e(B ")\e$B!V\e(B")
596(modify-syntax-entry ?\\e$B!Y\e(B ")\e$B!X\e(B")
597
598(modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
599(modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
600(modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
601(modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
602(modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
603(let ((row 48))
604 (while (< row 127)
605 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
606 (setq row (1+ row))))
210dede6
KH
607(modify-category-entry ?\e$B!<\e(B ?K)
608(let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
4ed46869
KH
609 (while chars
610 (modify-category-entry (car chars) ?K)
611 (modify-category-entry (car chars) ?H)
612 (setq chars (cdr chars))))
613(let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
614 (while chars
615 (modify-category-entry (car chars) ?C)
616 (setq chars (cdr chars))))
617
618;; JISX0212
269a5dd0 619;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
4ed46869
KH
620(modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
621(modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
622(modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
623
624(modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
625
626;; JISX0201-Kana
269a5dd0
DL
627;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
628(let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
629 ;; Unicode:
630 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
4ed46869
KH
631 (while chars
632 (modify-syntax-entry (car chars) ".")
633 (setq chars (cdr chars))))
634
226e4119
KH
635(modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
636(modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
637
4ed46869
KH
638;; Korean character set (KSC5601)
639
269a5dd0 640;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
4ed46869
KH
641(modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
642(modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
643(modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
644(modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
645(modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
646(modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
647
648(modify-category-entry (make-char 'korean-ksc5601) ?h)
649(modify-category-entry (make-char 'korean-ksc5601 35) ?A)
650(modify-category-entry (make-char 'korean-ksc5601 37) ?G)
651(modify-category-entry (make-char 'korean-ksc5601 42) ?H)
652(modify-category-entry (make-char 'korean-ksc5601 43) ?K)
653(modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
654
d05cfa1f
KH
655;; Latin character set (latin-1,2,3,4,5,8,9)
656
657(modify-category-entry (make-char 'latin-iso8859-1) ?l)
658(modify-category-entry (make-char 'latin-iso8859-2) ?l)
659(modify-category-entry (make-char 'latin-iso8859-3) ?l)
660(modify-category-entry (make-char 'latin-iso8859-4) ?l)
661(modify-category-entry (make-char 'latin-iso8859-9) ?l)
662(modify-category-entry (make-char 'latin-iso8859-14) ?l)
663(modify-category-entry (make-char 'latin-iso8859-15) ?l)
664
665(modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
666(modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
667(modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
668(modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
669(modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
670(modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
671(modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
672
6eba8645
KH
673;; Lao character set
674
675(modify-category-entry (make-char 'lao) ?o)
269a5dd0
DL
676(dotimes (i (1+ (- #xeff #xe80)))
677 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
6eba8645
KH
678
679(let ((deflist '(;; chars syntax category
680 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
681 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
682 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
683 ("\e(1XY\e(B" "w" ?3) ; vowel lower
a1506d29 684 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
6eba8645 685 ("\e(1\\e(B" "w" ?9) ; semivowel lower
9765a2ba 686 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
d5f05a67 687 ("\e(1Of\e(B" "_" ?5) ; symbol
269a5dd0
DL
688 ;; Unicode equivalents
689 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
690 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
691 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
692 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
a1506d29 693 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
269a5dd0
DL
694 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
695 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
696 ("\e$,1DODf\e(B" "_" ?5) ; symbol
6eba8645
KH
697 ))
698 elm chars len syntax category to ch i)
699 (while deflist
700 (setq elm (car deflist))
701 (setq chars (car elm)
702 len (length chars)
703 syntax (nth 1 elm)
704 category (nth 2 elm)
705 i 0)
706 (while (< i len)
707 (if (= (aref chars i) ?-)
708 (setq i (1+ i)
4a027a0d
KH
709 to (aref chars i))
710 (setq ch (aref chars i)
6eba8645
KH
711 to ch))
712 (while (<= ch to)
269a5dd0
DL
713 (unless (string-equal syntax "w")
714 (modify-syntax-entry ch syntax))
6eba8645
KH
715 (modify-category-entry ch category)
716 (setq ch (1+ ch)))
4a027a0d 717 (setq i (1+ i)))
6eba8645
KH
718 (setq deflist (cdr deflist))))
719
4ed46869
KH
720;; Thai character set (TIS620)
721
722(modify-category-entry (make-char 'thai-tis620) ?t)
269a5dd0
DL
723(dotimes (i (1+ (- #xe7f #xe00)))
724 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
4ed46869
KH
725
726(let ((deflist '(;; chars syntax category
727 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
728 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
729 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
730 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
a1506d29 731 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
9765a2ba
KH
732 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
733 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
269a5dd0
DL
734 ;; Unicode equivalents
735 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
736 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
737 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
738 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
a1506d29 739 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
269a5dd0
DL
740 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
741 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
4ed46869
KH
742 ))
743 elm chars len syntax category to ch i)
9395eb7c
KH
744 (while deflist
745 (setq elm (car deflist))
746 (setq chars (car elm)
747 len (length chars)
748 syntax (nth 1 elm)
749 category (nth 2 elm)
750 i 0)
751 (while (< i len)
752 (if (= (aref chars i) ?-)
753 (setq i (1+ i)
4a027a0d
KH
754 to (aref chars i))
755 (setq ch (aref chars i)
9395eb7c
KH
756 to ch))
757 (while (<= ch to)
269a5dd0
DL
758 (unless (string-equal syntax "w")
759 (modify-syntax-entry ch syntax))
9395eb7c
KH
760 (modify-category-entry ch category)
761 (setq ch (1+ ch)))
4a027a0d 762 (setq i (1+ i)))
9395eb7c
KH
763 (setq deflist (cdr deflist))))
764
765;; Tibetan character set
766
16230888
KH
767(modify-category-entry (make-char 'tibetan) ?q)
768(modify-category-entry (make-char 'tibetan-1-column) ?q)
269a5dd0
DL
769(dotimes (i (1+ (- #xfff #xf00)))
770 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
9395eb7c
KH
771
772(let ((deflist '(;; chars syntax category
269a5dd0 773 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
e6f02372 774 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
9395eb7c
KH
775 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
776 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
777 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
778 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
e6f02372 779 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
9395eb7c
KH
780 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
781 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
782 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
783 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
784 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
785 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
e6f02372 786 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
269a5dd0
DL
787
788 ;; Unicode version (not complete)
789 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
790 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
791 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
792 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
793 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
794 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
795 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
796 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
797 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
798 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
9395eb7c
KH
799 ))
800 elm chars len syntax category to ch i)
4ed46869
KH
801 (while deflist
802 (setq elm (car deflist))
803 (setq chars (car elm)
804 len (length chars)
805 syntax (nth 1 elm)
806 category (nth 2 elm)
807 i 0)
808 (while (< i len)
809 (if (= (aref chars i) ?-)
810 (setq i (1+ i)
4a027a0d
KH
811 to (aref chars i))
812 (setq ch (aref chars i)
4ed46869
KH
813 to ch))
814 (while (<= ch to)
269a5dd0
DL
815 (unless (string-equal syntax "w")
816 (modify-syntax-entry ch syntax))
4ed46869
KH
817 (modify-category-entry ch category)
818 (setq ch (1+ ch)))
4a027a0d 819 (setq i (1+ i)))
4ed46869
KH
820 (setq deflist (cdr deflist))))
821
822;; Vietnamese character set
823
824(let ((lower (make-char 'vietnamese-viscii-lower))
825 (upper (make-char 'vietnamese-viscii-upper)))
269a5dd0
DL
826;; (modify-syntax-entry lower "w")
827;; (modify-syntax-entry upper "w")
4ed46869
KH
828 (modify-category-entry lower ?v)
829 (modify-category-entry upper ?v)
830 (modify-category-entry lower ?l) ; To make a word with
831 (modify-category-entry upper ?l) ; latin characters.
832 )
833
e5dd1155
KH
834(let ((tbl (standard-case-table))
835 (i 32))
836 (while (< i 128)
837 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
838 (make-char 'vietnamese-viscii-lower i)
839 tbl)
840 (setq i (1+ i))))
841
d05cfa1f
KH
842;; Unicode (mule-unicode-0100-24ff)
843
85ef8ece
KH
844(let ((tbl (standard-case-table)) c)
845
846;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
847;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
848;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
849;; Thus we have to check language-environment to handle casing
850;; correctly. Currently only I<->i is available.
851
85ef8ece
KH
852 ;; Latin Extended-A, Latin Extended-B
853 (setq c #x0100)
854 (while (<= c #x0233)
d05cfa1f 855 (modify-category-entry (decode-char 'ucs c) ?l)
85ef8ece
KH
856 (and (or (<= c #x012e)
857 (and (>= c #x014a) (<= c #x0177)))
d05cfa1f
KH
858 (zerop (% c 2))
859 (set-case-syntax-pair
860 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
85ef8ece
KH
861 (and (>= c #x013a)
862 (<= c #x0148)
863 (zerop (% c 2))
864 (set-case-syntax-pair
865 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
d05cfa1f 866 (setq c (1+ c)))
85ef8ece
KH
867 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
868 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
869 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
8325c01e 870;;; (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl) ; these two have different length!
d05cfa1f
KH
871 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
872 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
873 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
874
269a5dd0
DL
875 ;; Latin Extended-B
876 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
877 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
878 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
879 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
880 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
881 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
882 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
883 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
884 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
885 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
886 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
887 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
888 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
889 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
890 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
891 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
892 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
893 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
894 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
895 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
896 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
897 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
898 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
899 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
900 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
901 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
902 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
903 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
904 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
905 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
906 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
907 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
908 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
909 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
910 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
911 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
912 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
913 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
914 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
915 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
916 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
917 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
918 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
919 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
920 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
921 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
922 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
923 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
924 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
925 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
926 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
927 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
928 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
929 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
930 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
931 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
932 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
933 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
934 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
935 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
936 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
937 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
938 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
939 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
940 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
941 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
942 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
943 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
944 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
945 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
946 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
947 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
948 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
949 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
950 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
951 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
952 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
953 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
954 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
955 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
956 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
957 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
958 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
959 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
960 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
961 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
962 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
963 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
964 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
965 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
966 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
967 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
968 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
969 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
970
85ef8ece
KH
971 ;; Latin Extended Additional
972 (setq c #x1e00)
d05cfa1f
KH
973 (while (<= c #x1ef9)
974 (modify-category-entry (decode-char 'ucs c) ?l)
975 (and (zerop (% c 2))
976 (or (<= c #x1e94) (>= c #x1ea0))
977 (set-case-syntax-pair
978 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
979 (setq c (1+ c)))
980
85ef8ece
KH
981 ;; Greek
982 (setq c #x0370)
d05cfa1f
KH
983 (while (<= c #x03ff)
984 (modify-category-entry (decode-char 'ucs c) ?g)
985 (if (or (and (>= c #x0391) (<= c #x03a1))
986 (and (>= c #x03a3) (<= c #x03ab)))
987 (set-case-syntax-pair
988 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
989 (and (>= c #x03da)
990 (<= c #x03ee)
991 (zerop (% c 2))
992 (set-case-syntax-pair
993 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
994 (setq c (1+ c)))
995 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
996 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
997 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
998 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
999 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
1000 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
1001 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
1002
269a5dd0
DL
1003 ;; Armenian
1004 (setq c #x531)
1005 (while (<= c #x556)
1006 (set-case-syntax-pair (decode-char 'ucs c)
1007 (decode-char 'ucs (+ c #x30)) tbl)
1008 (setq c (1+ c)))
1009
85ef8ece
KH
1010 ;; Greek Extended
1011 (setq c #x1f00)
d05cfa1f
KH
1012 (while (<= c #x1fff)
1013 (modify-category-entry (decode-char 'ucs c) ?g)
1014 (and (<= (logand c #x000f) 7)
1015 (<= c #x1fa7)
1016 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1017 (/= (logand c #x00f0) 7)
1018 (set-case-syntax-pair
1019 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1020 (setq c (1+ c)))
1021 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1022 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1023 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1024 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1025 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1026 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1027 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1028 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1029 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1030 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1031 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1032 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1033 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1034 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1035 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1036 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1037 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1038 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1039 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1040 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1041 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1042 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1043 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1044 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1045
85ef8ece
KH
1046 ;; cyrillic
1047 (setq c #x0400)
d05cfa1f
KH
1048 (while (<= c #x04ff)
1049 (modify-category-entry (decode-char 'ucs c) ?y)
1050 (and (>= c #x0400)
1051 (<= c #x040f)
1052 (set-case-syntax-pair
1053 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1054 (and (>= c #x0410)
1055 (<= c #x042f)
1056 (set-case-syntax-pair
1057 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1058 (and (zerop (% c 2))
1059 (or (and (>= c #x0460) (<= c #x0480))
1060 (and (>= c #x048c) (<= c #x04be))
1061 (and (>= c #x04d0) (<= c #x04f4)))
1062 (set-case-syntax-pair
a1506d29 1063 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
d05cfa1f
KH
1064 (setq c (1+ c)))
1065 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1066 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1067 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1068 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1069 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1070
85ef8ece
KH
1071 ;; general punctuation
1072 (setq c #x2000)
d05cfa1f 1073 (while (<= c #x200b)
348a6a50 1074 (set-case-syntax (decode-char 'ucs c) " " tbl)
c1dc897c 1075 (setq c (1+ c)))
d05cfa1f 1076 (setq c #x2010)
c1dc897c
DL
1077 (while (<= c #x2016)
1078 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1079 (setq c (1+ c)))
1080 ;; Punctuation syntax for quotation marks (like `)
1081 (while (<= c #x201f)
1082 (set-case-syntax (decode-char 'ucs c) "." tbl)
1083 (setq c (1+ c)))
d05cfa1f 1084 (while (<= c #x2027)
348a6a50 1085 (set-case-syntax (decode-char 'ucs c) "_" tbl)
c1dc897c 1086 (setq c (1+ c)))
d05cfa1f 1087
269a5dd0
DL
1088 ;; Roman numerals
1089 (setq c #x2160)
1090 (while (<= c #x216f)
1091 (set-case-syntax-pair (decode-char 'ucs c)
1092 (decode-char 'ucs (+ c #x10)) tbl)
1093 (setq c (1+ c)))
1094
1095 ;; Circled Latin
1096 (setq c #x24b6)
1097 (while (<= c #x24cf)
1098 (set-case-syntax-pair (decode-char 'ucs c)
1099 (decode-char 'ucs (+ c 26)) tbl)
1100 (modify-category-entry (decode-char 'ucs c) ?l)
1101 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1102 (setq c (1+ c)))
1103
1104 ;; Fullwidth Latin
1105 (setq c #xff21)
1106 (while (<= c #xff3a)
1107 (set-case-syntax-pair (decode-char 'ucs c)
1108 (decode-char 'ucs (+ c #x20)) tbl)
1109 (modify-category-entry (decode-char 'ucs c) ?l)
1110 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1111 (setq c (1+ c)))
1112
269a5dd0
DL
1113 ;; Combining diacritics
1114 (setq c #x300)
1115 (while (<= c #x362)
1116 (modify-category-entry (decode-char 'ucs c) ?^)
1117 (setq c (1+ c)))
1118
1119 ;; Combining marks
1120 (setq c #x20d0)
1121 (while (<= c #x20e3)
1122 (modify-category-entry (decode-char 'ucs c) ?^)
1123 (setq c (1+ c)))
1124
1125 ;; Fixme: syntax for symbols &c
1126 )
d6af0bff
KH
1127
1128(let ((pairs
1129 '("\e$,1sEsF\e(B" ; U+2045 U+2046
1130 "\e$,1s}s~\e(B" ; U+207D U+207E
1131 "\e$,1t-t.\e(B" ; U+208D U+208E
d6af0bff
KH
1132 "\e$,1{){*\e(B" ; U+2329 U+232A
1133 "\e$,1|T|U\e(B" ; U+23B4 U+23B5
1134 "\e$,2&H&I\e(B" ; U+2768 U+2769
1135 "\e$,2&J&K\e(B" ; U+276A U+276B
1136 "\e$,2&L&M\e(B" ; U+276C U+276D
1137 "\e$,2&P&Q\e(B" ; U+2770 U+2771
1138 "\e$,2&R&S\e(B" ; U+2772 U+2773
1139 "\e$,2&T&U\e(B" ; U+2774 U+2775
1140 "\e$,2'f'g\e(B" ; U+27E6 U+27E7
1141 "\e$,2'h'i\e(B" ; U+27E8 U+27E9
1142 "\e$,2'j'k\e(B" ; U+27EA U+27EB
1143 "\e$,2,#,$\e(B" ; U+2983 U+2984
1144 "\e$,2,%,&\e(B" ; U+2985 U+2986
1145 "\e$,2,',(\e(B" ; U+2987 U+2988
1146 "\e$,2,),*\e(B" ; U+2989 U+298A
1147 "\e$,2,+,,\e(B" ; U+298B U+298C
1148 "\e$,2,-,.\e(B" ; U+298D U+298E
1149 "\e$,2,/,0\e(B" ; U+298F U+2990
1150 "\e$,2,1,2\e(B" ; U+2991 U+2992
1151 "\e$,2,3,4\e(B" ; U+2993 U+2994
1152 "\e$,2,5,6\e(B" ; U+2995 U+2996
1153 "\e$,2,7,8\e(B" ; U+2997 U+2998
1154 "\e$,2-<-=\e(B" ; U+29FC U+29FD
1155 "\e$,2=H=I\e(B" ; U+3008 U+3009
1156 "\e$,2=J=K\e(B" ; U+300A U+300B
1157 "\e$,2=L=M\e(B" ; U+300C U+300D
1158 "\e$,2=N=O\e(B" ; U+300E U+300F
1159 "\e$,2=P=Q\e(B" ; U+3010 U+3011
1160 "\e$,2=T=U\e(B" ; U+3014 U+3015
1161 "\e$,2=V=W\e(B" ; U+3016 U+3017
1162 "\e$,2=X=Y\e(B" ; U+3018 U+3019
1163 "\e$,2=Z=[\e(B" ; U+301A U+301B
1164 "\e$,3m~m\7f\e(B" ; U+FD3E U+FD3F
1165 "\e$,3pUpV\e(B" ; U+FE35 U+FE36
1166 "\e$,3pWpX\e(B" ; U+FE37 U+FE38
1167 "\e$,3pYpZ\e(B" ; U+FE39 U+FE3A
1168 "\e$,3p[p\\e(B" ; U+FE3B U+FE3C
1169 "\e$,3p]p^\e(B" ; U+FE3D U+FE3E
1170 "\e$,3p_p`\e(B" ; U+FE3F U+FE40
1171 "\e$,3papb\e(B" ; U+FE41 U+FE42
1172 "\e$,3pcpd\e(B" ; U+FE43 U+FE44
1173 "\e$,3pypz\e(B" ; U+FE59 U+FE5A
1174 "\e$,3p{p|\e(B" ; U+FE5B U+FE5C
1175 "\e$,3p}p~\e(B" ; U+FE5D U+FE5E
1176 "\e$,3rhri\e(B" ; U+FF08 U+FF09
1177 "\e$,3s;s=\e(B" ; U+FF3B U+FF3D
1178 "\e$,3s[s]\e(B" ; U+FF5B U+FF5D
1179 "\e$,3s_s`\e(B" ; U+FF5F U+FF60
1180 "\e$,3sbsc\e(B" ; U+FF62 U+FF63
1181 )))
1182 (dolist (elt pairs)
1183 (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
1184 (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
1185
4ed46869
KH
1186\f
1187;;; Setting word boundary.
1188
1189(setq word-combining-categories
1190 '((?l . ?l)))
1191
1192(setq word-separating-categories ; (2-byte character sets)
1193 '((?A . ?K) ; Alpha numeric - Katakana
1194 (?A . ?C) ; Alpha numeric - Chinese
1195 (?H . ?A) ; Hiragana - Alpha numeric
1196 (?H . ?K) ; Hiragana - Katakana
1197 (?H . ?C) ; Hiragana - Chinese
1198 (?K . ?A) ; Katakana - Alpha numeric
1199 (?K . ?C) ; Katakana - Chinese
1200 (?C . ?A) ; Chinese - Alpha numeric
1201 (?C . ?K) ; Chinese - Katakana
1202 ))
777cfce6
KH
1203
1204\f
1205;; For each character set, put the information of the most proper
aaa9f206 1206;; coding system to encode it by `preferred-coding-system' property.
777cfce6
KH
1207
1208(let ((l '((latin-iso8859-1 . iso-latin-1)
1209 (latin-iso8859-2 . iso-latin-2)
1210 (latin-iso8859-3 . iso-latin-3)
1211 (latin-iso8859-4 . iso-latin-4)
1212 (thai-tis620 . thai-tis620)
1213 (greek-iso8859-7 . greek-iso-8bit)
1214 (arabic-iso8859-6 . iso-2022-7bit)
1215 (hebrew-iso8859-8 . hebrew-iso-8bit)
1216 (katakana-jisx0201 . japanese-shift-jis)
1217 (latin-jisx0201 . japanese-shift-jis)
1218 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1219 (latin-iso8859-9 . iso-latin-5)
1220 (japanese-jisx0208-1978 . iso-2022-jp)
1221 (chinese-gb2312 . cn-gb-2312)
1222 (japanese-jisx0208 . iso-2022-jp)
1223 (korean-ksc5601 . iso-2022-kr)
1224 (japanese-jisx0212 . iso-2022-jp)
1225 (chinese-cns11643-1 . iso-2022-cn)
1226 (chinese-cns11643-2 . iso-2022-cn)
1227 (chinese-big5-1 . chinese-big5)
1228 (chinese-big5-2 . chinese-big5)
1229 (chinese-sisheng . iso-2022-7bit)
1230 (ipa . iso-2022-7bit)
1231 (vietnamese-viscii-lower . vietnamese-viscii)
1232 (vietnamese-viscii-upper . vietnamese-viscii)
1233 (arabic-digit . iso-2022-7bit)
1234 (arabic-1-column . iso-2022-7bit)
1235 (ascii-right-to-left . iso-2022-7bit)
1236 (lao . lao)
1237 (arabic-2-column . iso-2022-7bit)
1238 (indian-is13194 . devanagari)
69e138b2 1239 (indian-glyph . devanagari)
777cfce6 1240 (tibetan-1-column . tibetan)
58cd41a3 1241 (ethiopic . iso-2022-7bit)
777cfce6
KH
1242 (chinese-cns11643-3 . iso-2022-cn)
1243 (chinese-cns11643-4 . iso-2022-cn)
1244 (chinese-cns11643-5 . iso-2022-cn)
1245 (chinese-cns11643-6 . iso-2022-cn)
1246 (chinese-cns11643-7 . iso-2022-cn)
1247 (indian-2-column . devanagari)
7a860cf2
DL
1248 (tibetan . tibetan)
1249 (latin-iso8859-14 . iso-latin-8)
1250 (latin-iso8859-15 . iso-latin-9))))
777cfce6 1251 (while l
aaa9f206 1252 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
777cfce6 1253 (setq l (cdr l))))
df0415c5
KH
1254
1255\f
98a663f1 1256;; Setup auto-fill-chars for charsets that should invoke auto-filling.
269a5dd0
DL
1257;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1258;; property on the charsets.
df0415c5
KH
1259(let ((l '(katakana-jisx0201
1260 japanese-jisx0208 japanese-jisx0212
1261 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1262 (while l
1263 (aset auto-fill-chars (make-char (car l)) t)
98a663f1 1264 (put-charset-property (car l) 'nospace-between-words t)
df0415c5 1265 (setq l (cdr l))))
777cfce6
KH
1266
1267;;; Local Variables:
1268;;; coding: iso-2022-7bit
1269;;; End:
1270
ab5796a9 1271;;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d
60370d40 1272;;; characters.el ends here