Undo prev change.
[bpt/emacs.git] / lisp / international / characters.el
CommitLineData
4ed46869
KH
1;;; characters.el --- set syntax and category for multibyte characters
2
28636af6 3;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
fa526c4a 4;; Licensed to the Free Software Foundation.
4ed46869
KH
5
6;; Keywords: multibyte character, character set, syntax, category
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
369314dc
KH
21;; along with GNU Emacs; see the file COPYING. If not, write to the
22;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23;; Boston, MA 02111-1307, USA.
4ed46869
KH
24
25;;; Commentary:
26
27;; This file contains multibyte characters. Save this file always in
4b16fa0c 28;; the coding system `iso-2022-7bit'.
4ed46869 29
49adf443
RS
30;; This file does not define the syntax for Latin-N character sets;
31;; those are defined by the files latin-N.el.
32
4ed46869
KH
33;;; Predefined categories.
34
35;; For each character set.
36
37(define-category ?a "ASCII")
38(define-category ?l "Latin")
39(define-category ?t "Thai")
40(define-category ?g "Greek")
41(define-category ?b "Arabic")
42(define-category ?w "Hebrew")
43(define-category ?y "Cyrillic")
44(define-category ?k "Japanese katakana")
45(define-category ?r "Japanese roman")
46(define-category ?c "Chinese")
47(define-category ?j "Japanese")
48(define-category ?h "Korean")
49(define-category ?e "Ethiopic (Ge'ez)")
50(define-category ?v "Vietnamese")
51(define-category ?i "Indian")
6eba8645 52(define-category ?o "Lao")
9395eb7c 53(define-category ?q "Tibetan")
4ed46869
KH
54
55;; For each group (row) of 2-byte character sets.
56
94487c4e 57(define-category ?A "Alpha-numeric characters of 2-byte character sets")
4ed46869 58(define-category ?C "Chinese (Han) characters of 2-byte character sets")
94487c4e 59(define-category ?G "Greek characters of 2-byte character sets")
4ed46869
KH
60(define-category ?H "Japanese Hiragana characters of 2-byte character sets")
61(define-category ?K "Japanese Katakana characters of 2-byte character sets")
62(define-category ?N "Korean Hangul characters of 2-byte character sets")
94487c4e 63(define-category ?Y "Cyrillic characters of 2-byte character sets")
4ed46869
KH
64(define-category ?I "Indian Glyphs")
65
66;; For phonetic classifications.
67
68(define-category ?0 "consonant")
9765a2ba 69(define-category ?1 "base (independent) vowel")
4ed46869
KH
70(define-category ?2 "upper diacritical mark (including upper vowel)")
71(define-category ?3 "lower diacritical mark (including lower vowel)")
72(define-category ?4 "tone mark")
9765a2ba 73(define-category ?5 "symbol")
4ed46869
KH
74(define-category ?6 "digit")
75(define-category ?7 "vowel-modifying diacritical mark")
6eba8645
KH
76(define-category ?8 "vowel-signs")
77(define-category ?9 "semivowel lower")
4ed46869
KH
78
79;; For filling.
80(define-category ?| "While filling, we can break a line at this character.")
81
504af7b2 82;; For indentation calculation.
777cfce6
KH
83(define-category ?
84 "This character counts as a space for indentation purposes.")
504af7b2 85
94487c4e 86;; Keep the following for `kinsoku' processing. See comments in
4ed46869
KH
87;; kinsoku.el.
88(define-category ?> "A character which can't be placed at beginning of line.")
89(define-category ?< "A character which can't be placed at end of line.")
90
91\f
92;;; Setting syntax and category.
93
94;; ASCII
95
96(let ((ch 32))
97 (while (< ch 127) ; All ASCII characters have
98 (modify-category-entry ch ?a) ; the category `a' (ASCII)
99 (modify-category-entry ch ?l) ; and `l' (Latin).
100 (setq ch (1+ ch))))
101
102;; Arabic character set
103
104(let ((charsets '(arabic-iso8859-6
105 arabic-digit
106 arabic-1-column
107 arabic-2-column)))
108 (while charsets
109 (modify-syntax-entry (make-char (car charsets)) "w")
110 (modify-category-entry (make-char (car charsets)) ?b)
111 (setq charsets (cdr charsets))))
112
113;; Chinese character set (GB2312)
114
115(modify-syntax-entry (make-char 'chinese-gb2312) "w")
116(modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
117(modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
118(modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
119(modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
120(modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
121(modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
122(modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
123(modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
124(modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
125(modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
126(modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
127(modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
128(modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
129(modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
130(modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
131(modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
132(modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
133
134(modify-category-entry (make-char 'chinese-gb2312) ?c)
135(modify-category-entry (make-char 'chinese-gb2312) ?\|)
136(modify-category-entry (make-char 'chinese-gb2312 35) ?A)
137(modify-category-entry (make-char 'chinese-gb2312 36) ?H)
138(modify-category-entry (make-char 'chinese-gb2312 37) ?K)
139(modify-category-entry (make-char 'chinese-gb2312 38) ?G)
140(modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
4ed46869
KH
141(let ((row 48))
142 (while (< row 127)
143 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
144 (setq row (1+ row))))
145
146;; Chinese character set (BIG5)
147
148(let ((generic-big5-1-char (make-char 'chinese-big5-1))
149 (generic-big5-2-char (make-char 'chinese-big5-2)))
150 (modify-syntax-entry generic-big5-1-char "w")
151 (modify-syntax-entry generic-big5-2-char "w")
152
153 (modify-category-entry generic-big5-1-char ?c)
154 (modify-category-entry generic-big5-2-char ?c)
155
156 (modify-category-entry generic-big5-1-char ?C)
157 (modify-category-entry generic-big5-2-char ?C)
158
159 (modify-category-entry generic-big5-1-char ?\|)
160 (modify-category-entry generic-big5-2-char ?\|))
161
162
163;; Chinese character set (CNS11643)
164
165(let ((cns-list '(chinese-cns11643-1
166 chinese-cns11643-2
167 chinese-cns11643-3
168 chinese-cns11643-4
169 chinese-cns11643-5
170 chinese-cns11643-6
171 chinese-cns11643-7))
172 generic-char)
173 (while cns-list
174 (setq generic-char (make-char (car cns-list)))
175 (modify-syntax-entry generic-char "w")
176 (modify-category-entry generic-char ?c)
177 (modify-category-entry generic-char ?C)
178 (modify-category-entry generic-char ?|)
179 (setq cns-list (cdr cns-list))))
180
181;; Cyrillic character set (ISO-8859-5)
182
183(modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
184
28636af6 185(modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
4ed46869
KH
186(modify-syntax-entry ?\e,L-\e(B ".")
187(modify-syntax-entry ?\e,Lp\e(B ".")
188(modify-syntax-entry ?\e,L}\e(B ".")
28636af6
RS
189(let ((tbl (standard-case-table)))
190 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
191 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
192 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
193 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
194 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
195 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
196 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
197 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
198 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
199 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
200 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
201 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
202 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
203 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
204 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
205 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
206 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
207 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
208 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
209 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
210 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
211 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
212 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
213 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
214 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
215 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
216 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
217 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
218 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
219 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
220 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
221 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
222 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
223 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
224 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
225 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
226 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
227 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
228 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
229 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
230 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
231 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
232 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
233 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
234 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
235 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl))
4ed46869 236
9395eb7c
KH
237;; Devanagari character set
238
239(let ((deflist '(;; chars syntax category
240 ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
241 ; chandrabindu, anuswar, visarga
9765a2ba 242 ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
9395eb7c
KH
243 ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
244 ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
245 ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
246 ))
247 elm chars len syntax category to ch i)
248 (while deflist
249 (setq elm (car deflist))
250 (setq chars (car elm)
251 len (length chars)
252 syntax (nth 1 elm)
253 category (nth 2 elm)
254 i 0)
255 (while (< i len)
256 (if (= (aref chars i) ?-)
257 (setq i (1+ i)
4a027a0d
KH
258 to (aref chars i))
259 (setq ch (aref chars i)
9395eb7c
KH
260 to ch))
261 (while (<= ch to)
262 (modify-syntax-entry ch syntax)
263 (modify-category-entry ch category)
264 (setq ch (1+ ch)))
4a027a0d 265 (setq i (1+ i)))
9395eb7c
KH
266 (setq deflist (cdr deflist))))
267
4ed46869
KH
268;; Ethiopic character set
269
270(modify-category-entry (make-char 'ethiopic) ?e)
c23d3872
KH
271(modify-syntax-entry (make-char 'ethiopic) "w")
272(let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B)))
273 (while chars
274 (modify-syntax-entry (car chars) ".")
275 (setq chars (cdr chars))))
4ed46869
KH
276
277;; European character set (Latin-1,2,3,4,5)
278
279(modify-category-entry (make-char 'latin-iso8859-1) ?l)
280(modify-category-entry (make-char 'latin-iso8859-2) ?l)
281(modify-category-entry (make-char 'latin-iso8859-3) ?l)
282(modify-category-entry (make-char 'latin-iso8859-4) ?l)
283(modify-category-entry (make-char 'latin-iso8859-9) ?l)
7a860cf2
DL
284(modify-category-entry (make-char 'latin-iso8859-14) ?l)
285(modify-category-entry (make-char 'latin-iso8859-15) ?l)
4ed46869 286
504af7b2
KH
287(modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
288(modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
289(modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
290(modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
291(modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
7a860cf2
DL
292(modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
293(modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
504af7b2 294
4ed46869
KH
295;; Greek character set (ISO-8859-7)
296
297(modify-category-entry (make-char 'greek-iso8859-7) ?g)
298
299(let ((c 182))
300 (while (< c 255)
301 (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
302 (setq c (1+ c))))
303(modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
304(modify-syntax-entry ?\e,F7\e(B ".")
305(modify-syntax-entry ?\e,F;\e(B ".")
306(modify-syntax-entry ?\e,F=\e(B ".")
4b7c7a40
DL
307(let ((tbl (standard-case-table)))
308 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
309 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
310 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
311 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
312 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
313 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
314 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
315 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
316 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
317 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
318 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
319 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
320 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
321 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
322 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
323 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
324 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
325 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
326 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
327 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
328 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
329 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
330 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
331 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
332 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
333 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
334 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
335 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
336 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
337 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
338 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
339 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
340 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl))
4ed46869
KH
341
342;; Hebrew character set (ISO-8859-8)
343
344(modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
345
346(let ((c 224))
347 (while (< c 251)
348 (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
349 (setq c (1+ c))))
350(modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
351
352;; Indian character set (IS 13194 and other Emacs original Indian charsets)
353
354(modify-category-entry (make-char 'indian-is13194) ?i)
355(modify-category-entry (make-char 'indian-2-column) ?I)
356(modify-category-entry (make-char 'indian-1-column) ?I)
357
4a027a0d
KH
358(let ((deflist
359 '(;; chars syntax category
360 ("\e(5!"#\e(B" "w" ?7) ; vowel-modifying diacritical mark
361 ; chandrabindu, anuswar, visarga
362 ("\e(5$\e(B-\e(52\e(B" "w" ?1) ; base (independent) vowel
363 ("\e(53\e(B-\e(5X\e(B" "w" ?0) ; consonant
364 ("\e(5Z\e(B-\e(5g\e(B" "w" ?8) ; matra
365 ("\e(5q\e(B-\e(5z\e(B" "w" ?6) ; digit
366 ))
367 elm chars len syntax category to ch i)
368 (while deflist
369 (setq elm (car deflist))
370 (setq chars (car elm)
371 len (length chars)
372 syntax (nth 1 elm)
373 category (nth 2 elm)
374 i 0)
375 (while (< i len)
376 (if (= (aref chars i) ?-)
377 (setq i (1+ i)
378 to (aref chars i))
379 (setq ch (aref chars i)
380 to ch))
381 (while (<= ch to)
382 (modify-syntax-entry ch syntax)
383 (modify-category-entry ch category)
384 (setq ch (1+ ch)))
385 (setq i (1+ i)))
386 (setq deflist (cdr deflist))))
387
388
4ed46869
KH
389;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
390
391(modify-category-entry (make-char 'katakana-jisx0201) ?k)
392(modify-category-entry (make-char 'latin-jisx0201) ?r)
393(modify-category-entry (make-char 'japanese-jisx0208) ?j)
394(modify-category-entry (make-char 'japanese-jisx0212) ?j)
9395eb7c 395(modify-category-entry (make-char 'katakana-jisx0201) ?\|)
4ed46869 396(modify-category-entry (make-char 'japanese-jisx0208) ?\|)
9395eb7c 397(modify-category-entry (make-char 'japanese-jisx0212) ?\|)
4ed46869
KH
398
399;; JISX0208
400(modify-syntax-entry (make-char 'japanese-jisx0208) "w")
401(modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
402(modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
403(modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
404(let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
405 (while chars
406 (modify-syntax-entry (car chars) "w")
407 (setq chars (cdr chars))))
408(modify-syntax-entry ?\\e$B!J\e(B "(\e$B!K\e(B")
409(modify-syntax-entry ?\\e$B!N\e(B "(\e$B!O\e(B")
410(modify-syntax-entry ?\\e$B!P\e(B "(\e$B!Q\e(B")
411(modify-syntax-entry ?\\e$B!V\e(B "(\e$B!W\e(B")
412(modify-syntax-entry ?\\e$B!X\e(B "(\e$B!Y\e(B")
413(modify-syntax-entry ?\\e$B!K\e(B ")\e$B!J\e(B")
414(modify-syntax-entry ?\\e$B!O\e(B ")\e$B!N\e(B")
415(modify-syntax-entry ?\\e$B!Q\e(B ")\e$B!P\e(B")
416(modify-syntax-entry ?\\e$B!W\e(B ")\e$B!V\e(B")
417(modify-syntax-entry ?\\e$B!Y\e(B ")\e$B!X\e(B")
418
419(modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
420(modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
421(modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
422(modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
423(modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
424(let ((row 48))
425 (while (< row 127)
426 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
427 (setq row (1+ row))))
210dede6
KH
428(modify-category-entry ?\e$B!<\e(B ?K)
429(let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
4ed46869
KH
430 (while chars
431 (modify-category-entry (car chars) ?K)
432 (modify-category-entry (car chars) ?H)
433 (setq chars (cdr chars))))
434(let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
435 (while chars
436 (modify-category-entry (car chars) ?C)
437 (setq chars (cdr chars))))
438
439;; JISX0212
440(modify-syntax-entry (make-char 'japanese-jisx0212) "w")
441(modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
442(modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
443(modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
444
445(modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
446
447;; JISX0201-Kana
448(modify-syntax-entry (make-char 'katakana-jisx0201) "w")
449(let ((chars '(?\e(I!\e(B ?\e(I"\e(B ?\e(I#\e(B ?\e(I$\e(B ?\e(I%\e(B)))
450 (while chars
451 (modify-syntax-entry (car chars) ".")
452 (setq chars (cdr chars))))
453
454;; Korean character set (KSC5601)
455
456(modify-syntax-entry (make-char 'korean-ksc5601) "w")
457(modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
458(modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
459(modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
460(modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
461(modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
462(modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
463
464(modify-category-entry (make-char 'korean-ksc5601) ?h)
465(modify-category-entry (make-char 'korean-ksc5601 35) ?A)
466(modify-category-entry (make-char 'korean-ksc5601 37) ?G)
467(modify-category-entry (make-char 'korean-ksc5601 42) ?H)
468(modify-category-entry (make-char 'korean-ksc5601 43) ?K)
469(modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
470
6eba8645
KH
471;; Lao character set
472
473(modify-category-entry (make-char 'lao) ?o)
474
475(let ((deflist '(;; chars syntax category
476 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
477 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
478 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
479 ("\e(1XY\e(B" "w" ?3) ; vowel lower
480 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
481 ("\e(1\\e(B" "w" ?9) ; semivowel lower
9765a2ba 482 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
d5f05a67 483 ("\e(1Of\e(B" "_" ?5) ; symbol
6eba8645
KH
484 ))
485 elm chars len syntax category to ch i)
486 (while deflist
487 (setq elm (car deflist))
488 (setq chars (car elm)
489 len (length chars)
490 syntax (nth 1 elm)
491 category (nth 2 elm)
492 i 0)
493 (while (< i len)
494 (if (= (aref chars i) ?-)
495 (setq i (1+ i)
4a027a0d
KH
496 to (aref chars i))
497 (setq ch (aref chars i)
6eba8645
KH
498 to ch))
499 (while (<= ch to)
500 (modify-syntax-entry ch syntax)
501 (modify-category-entry ch category)
502 (setq ch (1+ ch)))
4a027a0d 503 (setq i (1+ i)))
6eba8645
KH
504 (setq deflist (cdr deflist))))
505
4ed46869
KH
506;; Thai character set (TIS620)
507
508(modify-category-entry (make-char 'thai-tis620) ?t)
509
510(let ((deflist '(;; chars syntax category
511 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
512 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
513 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
514 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
515 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
9765a2ba
KH
516 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
517 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
4ed46869
KH
518 ))
519 elm chars len syntax category to ch i)
9395eb7c
KH
520 (while deflist
521 (setq elm (car deflist))
522 (setq chars (car elm)
523 len (length chars)
524 syntax (nth 1 elm)
525 category (nth 2 elm)
526 i 0)
527 (while (< i len)
528 (if (= (aref chars i) ?-)
529 (setq i (1+ i)
4a027a0d
KH
530 to (aref chars i))
531 (setq ch (aref chars i)
9395eb7c
KH
532 to ch))
533 (while (<= ch to)
534 (modify-syntax-entry ch syntax)
535 (modify-category-entry ch category)
536 (setq ch (1+ ch)))
4a027a0d 537 (setq i (1+ i)))
9395eb7c
KH
538 (setq deflist (cdr deflist))))
539
540;; Tibetan character set
541
16230888
KH
542(modify-category-entry (make-char 'tibetan) ?q)
543(modify-category-entry (make-char 'tibetan-1-column) ?q)
9395eb7c
KH
544
545(let ((deflist '(;; chars syntax category
e6f02372
KH
546 ("\e$(7"!\e(B-\e$(7"J"K\e(B" "w" ?0) ; consonant
547 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
9395eb7c
KH
548 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
549 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
550 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
551 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
e6f02372 552 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
9395eb7c
KH
553 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
554 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
555 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
556 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
557 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
558 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
e6f02372 559 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
9395eb7c
KH
560 ))
561 elm chars len syntax category to ch i)
4ed46869
KH
562 (while deflist
563 (setq elm (car deflist))
564 (setq chars (car elm)
565 len (length chars)
566 syntax (nth 1 elm)
567 category (nth 2 elm)
568 i 0)
569 (while (< i len)
570 (if (= (aref chars i) ?-)
571 (setq i (1+ i)
4a027a0d
KH
572 to (aref chars i))
573 (setq ch (aref chars i)
4ed46869
KH
574 to ch))
575 (while (<= ch to)
576 (modify-syntax-entry ch syntax)
577 (modify-category-entry ch category)
578 (setq ch (1+ ch)))
4a027a0d 579 (setq i (1+ i)))
4ed46869
KH
580 (setq deflist (cdr deflist))))
581
582;; Vietnamese character set
583
584(let ((lower (make-char 'vietnamese-viscii-lower))
585 (upper (make-char 'vietnamese-viscii-upper)))
586 (modify-syntax-entry lower "w")
587 (modify-syntax-entry upper "w")
588 (modify-category-entry lower ?v)
589 (modify-category-entry upper ?v)
590 (modify-category-entry lower ?l) ; To make a word with
591 (modify-category-entry upper ?l) ; latin characters.
592 )
593
e5dd1155
KH
594(let ((tbl (standard-case-table))
595 (i 32))
596 (while (< i 128)
597 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
598 (make-char 'vietnamese-viscii-lower i)
599 tbl)
600 (setq i (1+ i))))
601
4ed46869
KH
602\f
603;;; Setting word boundary.
604
605(setq word-combining-categories
606 '((?l . ?l)))
607
608(setq word-separating-categories ; (2-byte character sets)
609 '((?A . ?K) ; Alpha numeric - Katakana
610 (?A . ?C) ; Alpha numeric - Chinese
611 (?H . ?A) ; Hiragana - Alpha numeric
612 (?H . ?K) ; Hiragana - Katakana
613 (?H . ?C) ; Hiragana - Chinese
614 (?K . ?A) ; Katakana - Alpha numeric
615 (?K . ?C) ; Katakana - Chinese
616 (?C . ?A) ; Chinese - Alpha numeric
617 (?C . ?K) ; Chinese - Katakana
618 ))
777cfce6
KH
619
620\f
621;; For each character set, put the information of the most proper
aaa9f206 622;; coding system to encode it by `preferred-coding-system' property.
777cfce6
KH
623
624(let ((l '((latin-iso8859-1 . iso-latin-1)
625 (latin-iso8859-2 . iso-latin-2)
626 (latin-iso8859-3 . iso-latin-3)
627 (latin-iso8859-4 . iso-latin-4)
628 (thai-tis620 . thai-tis620)
629 (greek-iso8859-7 . greek-iso-8bit)
630 (arabic-iso8859-6 . iso-2022-7bit)
631 (hebrew-iso8859-8 . hebrew-iso-8bit)
632 (katakana-jisx0201 . japanese-shift-jis)
633 (latin-jisx0201 . japanese-shift-jis)
634 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
635 (latin-iso8859-9 . iso-latin-5)
636 (japanese-jisx0208-1978 . iso-2022-jp)
637 (chinese-gb2312 . cn-gb-2312)
638 (japanese-jisx0208 . iso-2022-jp)
639 (korean-ksc5601 . iso-2022-kr)
640 (japanese-jisx0212 . iso-2022-jp)
641 (chinese-cns11643-1 . iso-2022-cn)
642 (chinese-cns11643-2 . iso-2022-cn)
643 (chinese-big5-1 . chinese-big5)
644 (chinese-big5-2 . chinese-big5)
645 (chinese-sisheng . iso-2022-7bit)
646 (ipa . iso-2022-7bit)
647 (vietnamese-viscii-lower . vietnamese-viscii)
648 (vietnamese-viscii-upper . vietnamese-viscii)
649 (arabic-digit . iso-2022-7bit)
650 (arabic-1-column . iso-2022-7bit)
651 (ascii-right-to-left . iso-2022-7bit)
652 (lao . lao)
653 (arabic-2-column . iso-2022-7bit)
654 (indian-is13194 . devanagari)
655 (indian-1-column . devanagari)
656 (tibetan-1-column . tibetan)
58cd41a3 657 (ethiopic . iso-2022-7bit)
777cfce6
KH
658 (chinese-cns11643-3 . iso-2022-cn)
659 (chinese-cns11643-4 . iso-2022-cn)
660 (chinese-cns11643-5 . iso-2022-cn)
661 (chinese-cns11643-6 . iso-2022-cn)
662 (chinese-cns11643-7 . iso-2022-cn)
663 (indian-2-column . devanagari)
7a860cf2
DL
664 (tibetan . tibetan)
665 (latin-iso8859-14 . iso-latin-8)
666 (latin-iso8859-15 . iso-latin-9))))
777cfce6 667 (while l
aaa9f206 668 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
777cfce6 669 (setq l (cdr l))))
df0415c5
KH
670
671\f
98a663f1
KH
672;; Setup auto-fill-chars for charsets that should invoke auto-filling.
673;; SPACE and NEWLIE are already set. Also put `nospace-between-words'
674;; property to the charsets.
df0415c5
KH
675(let ((l '(katakana-jisx0201
676 japanese-jisx0208 japanese-jisx0212
677 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
678 (while l
679 (aset auto-fill-chars (make-char (car l)) t)
98a663f1 680 (put-charset-property (car l) 'nospace-between-words t)
df0415c5 681 (setq l (cdr l))))
777cfce6
KH
682
683;;; Local Variables:
684;;; coding: iso-2022-7bit
685;;; End:
686
687;;; end of characters.el