Some fixes to follow coding conventions.
[bpt/emacs.git] / lisp / international / characters.el
CommitLineData
4ed46869
KH
1;;; characters.el --- set syntax and category for multibyte characters
2
28636af6 3;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
fa526c4a 4;; Licensed to the Free Software Foundation.
4ed46869
KH
5
6;; Keywords: multibyte character, character set, syntax, category
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
369314dc
KH
21;; along with GNU Emacs; see the file COPYING. If not, write to the
22;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23;; Boston, MA 02111-1307, USA.
4ed46869
KH
24
25;;; Commentary:
26
27;; This file contains multibyte characters. Save this file always in
4b16fa0c 28;; the coding system `iso-2022-7bit'.
4ed46869 29
49adf443
RS
30;; This file does not define the syntax for Latin-N character sets;
31;; those are defined by the files latin-N.el.
32
60370d40
PJ
33;;; Code:
34
4ed46869
KH
35;;; Predefined categories.
36
37;; For each character set.
38
39(define-category ?a "ASCII")
40(define-category ?l "Latin")
41(define-category ?t "Thai")
42(define-category ?g "Greek")
43(define-category ?b "Arabic")
44(define-category ?w "Hebrew")
45(define-category ?y "Cyrillic")
46(define-category ?k "Japanese katakana")
47(define-category ?r "Japanese roman")
48(define-category ?c "Chinese")
49(define-category ?j "Japanese")
50(define-category ?h "Korean")
51(define-category ?e "Ethiopic (Ge'ez)")
52(define-category ?v "Vietnamese")
53(define-category ?i "Indian")
6eba8645 54(define-category ?o "Lao")
9395eb7c 55(define-category ?q "Tibetan")
4ed46869
KH
56
57;; For each group (row) of 2-byte character sets.
58
94487c4e 59(define-category ?A "Alpha-numeric characters of 2-byte character sets")
4ed46869 60(define-category ?C "Chinese (Han) characters of 2-byte character sets")
94487c4e 61(define-category ?G "Greek characters of 2-byte character sets")
4ed46869
KH
62(define-category ?H "Japanese Hiragana characters of 2-byte character sets")
63(define-category ?K "Japanese Katakana characters of 2-byte character sets")
64(define-category ?N "Korean Hangul characters of 2-byte character sets")
94487c4e 65(define-category ?Y "Cyrillic characters of 2-byte character sets")
4ed46869
KH
66(define-category ?I "Indian Glyphs")
67
68;; For phonetic classifications.
69
70(define-category ?0 "consonant")
9765a2ba 71(define-category ?1 "base (independent) vowel")
4ed46869
KH
72(define-category ?2 "upper diacritical mark (including upper vowel)")
73(define-category ?3 "lower diacritical mark (including lower vowel)")
74(define-category ?4 "tone mark")
9765a2ba 75(define-category ?5 "symbol")
4ed46869
KH
76(define-category ?6 "digit")
77(define-category ?7 "vowel-modifying diacritical mark")
6eba8645
KH
78(define-category ?8 "vowel-signs")
79(define-category ?9 "semivowel lower")
4ed46869
KH
80
81;; For filling.
82(define-category ?| "While filling, we can break a line at this character.")
83
504af7b2 84;; For indentation calculation.
777cfce6
KH
85(define-category ?
86 "This character counts as a space for indentation purposes.")
504af7b2 87
94487c4e 88;; Keep the following for `kinsoku' processing. See comments in
4ed46869
KH
89;; kinsoku.el.
90(define-category ?> "A character which can't be placed at beginning of line.")
91(define-category ?< "A character which can't be placed at end of line.")
92
93\f
94;;; Setting syntax and category.
95
96;; ASCII
97
98(let ((ch 32))
99 (while (< ch 127) ; All ASCII characters have
100 (modify-category-entry ch ?a) ; the category `a' (ASCII)
101 (modify-category-entry ch ?l) ; and `l' (Latin).
102 (setq ch (1+ ch))))
103
104;; Arabic character set
105
106(let ((charsets '(arabic-iso8859-6
107 arabic-digit
108 arabic-1-column
109 arabic-2-column)))
110 (while charsets
111 (modify-syntax-entry (make-char (car charsets)) "w")
112 (modify-category-entry (make-char (car charsets)) ?b)
113 (setq charsets (cdr charsets))))
114
115;; Chinese character set (GB2312)
116
117(modify-syntax-entry (make-char 'chinese-gb2312) "w")
118(modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
119(modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
120(modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
121(modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
122(modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
123(modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
124(modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
125(modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
126(modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
127(modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
128(modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
129(modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
130(modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
131(modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
132(modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
133(modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
134(modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
135
136(modify-category-entry (make-char 'chinese-gb2312) ?c)
137(modify-category-entry (make-char 'chinese-gb2312) ?\|)
138(modify-category-entry (make-char 'chinese-gb2312 35) ?A)
139(modify-category-entry (make-char 'chinese-gb2312 36) ?H)
140(modify-category-entry (make-char 'chinese-gb2312 37) ?K)
141(modify-category-entry (make-char 'chinese-gb2312 38) ?G)
142(modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
4ed46869
KH
143(let ((row 48))
144 (while (< row 127)
145 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
146 (setq row (1+ row))))
147
148;; Chinese character set (BIG5)
149
150(let ((generic-big5-1-char (make-char 'chinese-big5-1))
151 (generic-big5-2-char (make-char 'chinese-big5-2)))
152 (modify-syntax-entry generic-big5-1-char "w")
153 (modify-syntax-entry generic-big5-2-char "w")
154
155 (modify-category-entry generic-big5-1-char ?c)
156 (modify-category-entry generic-big5-2-char ?c)
157
158 (modify-category-entry generic-big5-1-char ?C)
159 (modify-category-entry generic-big5-2-char ?C)
160
161 (modify-category-entry generic-big5-1-char ?\|)
162 (modify-category-entry generic-big5-2-char ?\|))
163
164
165;; Chinese character set (CNS11643)
166
167(let ((cns-list '(chinese-cns11643-1
168 chinese-cns11643-2
169 chinese-cns11643-3
170 chinese-cns11643-4
171 chinese-cns11643-5
172 chinese-cns11643-6
173 chinese-cns11643-7))
174 generic-char)
175 (while cns-list
176 (setq generic-char (make-char (car cns-list)))
177 (modify-syntax-entry generic-char "w")
178 (modify-category-entry generic-char ?c)
179 (modify-category-entry generic-char ?C)
180 (modify-category-entry generic-char ?|)
181 (setq cns-list (cdr cns-list))))
182
183;; Cyrillic character set (ISO-8859-5)
184
185(modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
186
28636af6 187(modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
4ed46869
KH
188(modify-syntax-entry ?\e,L-\e(B ".")
189(modify-syntax-entry ?\e,Lp\e(B ".")
190(modify-syntax-entry ?\e,L}\e(B ".")
28636af6
RS
191(let ((tbl (standard-case-table)))
192 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
193 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
194 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
195 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
196 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
197 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
198 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
199 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
200 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
201 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
202 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
203 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
204 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
205 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
206 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
207 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
208 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
209 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
210 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
211 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
212 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
213 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
214 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
215 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
216 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
217 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
218 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
219 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
220 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
221 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
222 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
223 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
224 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
225 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
226 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
227 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
228 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
229 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
230 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
231 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
232 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
233 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
234 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
235 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
236 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
237 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl))
4ed46869 238
9395eb7c
KH
239;; Devanagari character set
240
241(let ((deflist '(;; chars syntax category
242 ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
243 ; chandrabindu, anuswar, visarga
9765a2ba 244 ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
9395eb7c
KH
245 ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
246 ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
247 ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
248 ))
249 elm chars len syntax category to ch i)
250 (while deflist
251 (setq elm (car deflist))
252 (setq chars (car elm)
253 len (length chars)
254 syntax (nth 1 elm)
255 category (nth 2 elm)
256 i 0)
257 (while (< i len)
258 (if (= (aref chars i) ?-)
259 (setq i (1+ i)
4a027a0d
KH
260 to (aref chars i))
261 (setq ch (aref chars i)
9395eb7c
KH
262 to ch))
263 (while (<= ch to)
264 (modify-syntax-entry ch syntax)
265 (modify-category-entry ch category)
266 (setq ch (1+ ch)))
4a027a0d 267 (setq i (1+ i)))
9395eb7c
KH
268 (setq deflist (cdr deflist))))
269
4ed46869
KH
270;; Ethiopic character set
271
272(modify-category-entry (make-char 'ethiopic) ?e)
c23d3872
KH
273(modify-syntax-entry (make-char 'ethiopic) "w")
274(let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B)))
275 (while chars
276 (modify-syntax-entry (car chars) ".")
277 (setq chars (cdr chars))))
4ed46869 278
4ed46869
KH
279;; Greek character set (ISO-8859-7)
280
281(modify-category-entry (make-char 'greek-iso8859-7) ?g)
282
283(let ((c 182))
284 (while (< c 255)
285 (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
286 (setq c (1+ c))))
287(modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
288(modify-syntax-entry ?\e,F7\e(B ".")
289(modify-syntax-entry ?\e,F;\e(B ".")
290(modify-syntax-entry ?\e,F=\e(B ".")
4b7c7a40
DL
291(let ((tbl (standard-case-table)))
292 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
293 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
294 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
295 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
296 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
297 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
298 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
299 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
300 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
301 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
302 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
303 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
304 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
305 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
306 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
307 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
308 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
309 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
310 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
311 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
312 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
313 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
314 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
315 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
316 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
317 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
318 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
319 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
320 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
321 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
322 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
323 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
324 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl))
4ed46869
KH
325
326;; Hebrew character set (ISO-8859-8)
327
328(modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
329
330(let ((c 224))
331 (while (< c 251)
332 (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
333 (setq c (1+ c))))
334(modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
335
336;; Indian character set (IS 13194 and other Emacs original Indian charsets)
337
338(modify-category-entry (make-char 'indian-is13194) ?i)
339(modify-category-entry (make-char 'indian-2-column) ?I)
340(modify-category-entry (make-char 'indian-1-column) ?I)
341
4a027a0d
KH
342(let ((deflist
343 '(;; chars syntax category
344 ("\e(5!"#\e(B" "w" ?7) ; vowel-modifying diacritical mark
345 ; chandrabindu, anuswar, visarga
346 ("\e(5$\e(B-\e(52\e(B" "w" ?1) ; base (independent) vowel
347 ("\e(53\e(B-\e(5X\e(B" "w" ?0) ; consonant
348 ("\e(5Z\e(B-\e(5g\e(B" "w" ?8) ; matra
349 ("\e(5q\e(B-\e(5z\e(B" "w" ?6) ; digit
350 ))
351 elm chars len syntax category to ch i)
352 (while deflist
353 (setq elm (car deflist))
354 (setq chars (car elm)
355 len (length chars)
356 syntax (nth 1 elm)
357 category (nth 2 elm)
358 i 0)
359 (while (< i len)
360 (if (= (aref chars i) ?-)
361 (setq i (1+ i)
362 to (aref chars i))
363 (setq ch (aref chars i)
364 to ch))
365 (while (<= ch to)
366 (modify-syntax-entry ch syntax)
367 (modify-category-entry ch category)
368 (setq ch (1+ ch)))
369 (setq i (1+ i)))
370 (setq deflist (cdr deflist))))
371
372
4ed46869
KH
373;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
374
375(modify-category-entry (make-char 'katakana-jisx0201) ?k)
226e4119 376(modify-category-entry (make-char 'katakana-jisx0201) ?j)
4ed46869
KH
377(modify-category-entry (make-char 'latin-jisx0201) ?r)
378(modify-category-entry (make-char 'japanese-jisx0208) ?j)
379(modify-category-entry (make-char 'japanese-jisx0212) ?j)
9395eb7c 380(modify-category-entry (make-char 'katakana-jisx0201) ?\|)
4ed46869 381(modify-category-entry (make-char 'japanese-jisx0208) ?\|)
9395eb7c 382(modify-category-entry (make-char 'japanese-jisx0212) ?\|)
4ed46869
KH
383
384;; JISX0208
385(modify-syntax-entry (make-char 'japanese-jisx0208) "w")
386(modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
387(modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
388(modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
389(let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
390 (while chars
391 (modify-syntax-entry (car chars) "w")
392 (setq chars (cdr chars))))
393(modify-syntax-entry ?\\e$B!J\e(B "(\e$B!K\e(B")
394(modify-syntax-entry ?\\e$B!N\e(B "(\e$B!O\e(B")
395(modify-syntax-entry ?\\e$B!P\e(B "(\e$B!Q\e(B")
396(modify-syntax-entry ?\\e$B!V\e(B "(\e$B!W\e(B")
397(modify-syntax-entry ?\\e$B!X\e(B "(\e$B!Y\e(B")
398(modify-syntax-entry ?\\e$B!K\e(B ")\e$B!J\e(B")
399(modify-syntax-entry ?\\e$B!O\e(B ")\e$B!N\e(B")
400(modify-syntax-entry ?\\e$B!Q\e(B ")\e$B!P\e(B")
401(modify-syntax-entry ?\\e$B!W\e(B ")\e$B!V\e(B")
402(modify-syntax-entry ?\\e$B!Y\e(B ")\e$B!X\e(B")
403
404(modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
405(modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
406(modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
407(modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
408(modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
409(let ((row 48))
410 (while (< row 127)
411 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
412 (setq row (1+ row))))
210dede6
KH
413(modify-category-entry ?\e$B!<\e(B ?K)
414(let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
4ed46869
KH
415 (while chars
416 (modify-category-entry (car chars) ?K)
417 (modify-category-entry (car chars) ?H)
418 (setq chars (cdr chars))))
419(let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
420 (while chars
421 (modify-category-entry (car chars) ?C)
422 (setq chars (cdr chars))))
423
424;; JISX0212
425(modify-syntax-entry (make-char 'japanese-jisx0212) "w")
426(modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
427(modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
428(modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
429
430(modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
431
432;; JISX0201-Kana
433(modify-syntax-entry (make-char 'katakana-jisx0201) "w")
226e4119 434(let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B)))
4ed46869
KH
435 (while chars
436 (modify-syntax-entry (car chars) ".")
437 (setq chars (cdr chars))))
438
226e4119
KH
439(modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
440(modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
441
4ed46869
KH
442;; Korean character set (KSC5601)
443
444(modify-syntax-entry (make-char 'korean-ksc5601) "w")
445(modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
446(modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
447(modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
448(modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
449(modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
450(modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
451
452(modify-category-entry (make-char 'korean-ksc5601) ?h)
453(modify-category-entry (make-char 'korean-ksc5601 35) ?A)
454(modify-category-entry (make-char 'korean-ksc5601 37) ?G)
455(modify-category-entry (make-char 'korean-ksc5601 42) ?H)
456(modify-category-entry (make-char 'korean-ksc5601 43) ?K)
457(modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
458
d05cfa1f
KH
459;; Latin character set (latin-1,2,3,4,5,8,9)
460
461(modify-category-entry (make-char 'latin-iso8859-1) ?l)
462(modify-category-entry (make-char 'latin-iso8859-2) ?l)
463(modify-category-entry (make-char 'latin-iso8859-3) ?l)
464(modify-category-entry (make-char 'latin-iso8859-4) ?l)
465(modify-category-entry (make-char 'latin-iso8859-9) ?l)
466(modify-category-entry (make-char 'latin-iso8859-14) ?l)
467(modify-category-entry (make-char 'latin-iso8859-15) ?l)
468
469(modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
470(modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
471(modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
472(modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
473(modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
474(modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
475(modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
476
6eba8645
KH
477;; Lao character set
478
479(modify-category-entry (make-char 'lao) ?o)
480
481(let ((deflist '(;; chars syntax category
482 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
483 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
484 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
485 ("\e(1XY\e(B" "w" ?3) ; vowel lower
486 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
487 ("\e(1\\e(B" "w" ?9) ; semivowel lower
9765a2ba 488 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
d5f05a67 489 ("\e(1Of\e(B" "_" ?5) ; symbol
6eba8645
KH
490 ))
491 elm chars len syntax category to ch i)
492 (while deflist
493 (setq elm (car deflist))
494 (setq chars (car elm)
495 len (length chars)
496 syntax (nth 1 elm)
497 category (nth 2 elm)
498 i 0)
499 (while (< i len)
500 (if (= (aref chars i) ?-)
501 (setq i (1+ i)
4a027a0d
KH
502 to (aref chars i))
503 (setq ch (aref chars i)
6eba8645
KH
504 to ch))
505 (while (<= ch to)
506 (modify-syntax-entry ch syntax)
507 (modify-category-entry ch category)
508 (setq ch (1+ ch)))
4a027a0d 509 (setq i (1+ i)))
6eba8645
KH
510 (setq deflist (cdr deflist))))
511
4ed46869
KH
512;; Thai character set (TIS620)
513
514(modify-category-entry (make-char 'thai-tis620) ?t)
515
516(let ((deflist '(;; chars syntax category
517 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
518 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
519 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
520 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
521 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
9765a2ba
KH
522 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
523 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
4ed46869
KH
524 ))
525 elm chars len syntax category to ch i)
9395eb7c
KH
526 (while deflist
527 (setq elm (car deflist))
528 (setq chars (car elm)
529 len (length chars)
530 syntax (nth 1 elm)
531 category (nth 2 elm)
532 i 0)
533 (while (< i len)
534 (if (= (aref chars i) ?-)
535 (setq i (1+ i)
4a027a0d
KH
536 to (aref chars i))
537 (setq ch (aref chars i)
9395eb7c
KH
538 to ch))
539 (while (<= ch to)
540 (modify-syntax-entry ch syntax)
541 (modify-category-entry ch category)
542 (setq ch (1+ ch)))
4a027a0d 543 (setq i (1+ i)))
9395eb7c
KH
544 (setq deflist (cdr deflist))))
545
546;; Tibetan character set
547
16230888
KH
548(modify-category-entry (make-char 'tibetan) ?q)
549(modify-category-entry (make-char 'tibetan-1-column) ?q)
9395eb7c
KH
550
551(let ((deflist '(;; chars syntax category
e6f02372
KH
552 ("\e$(7"!\e(B-\e$(7"J"K\e(B" "w" ?0) ; consonant
553 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
9395eb7c
KH
554 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
555 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
556 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
557 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
e6f02372 558 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
9395eb7c
KH
559 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
560 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
561 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
562 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
563 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
564 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
e6f02372 565 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
9395eb7c
KH
566 ))
567 elm chars len syntax category to ch i)
4ed46869
KH
568 (while deflist
569 (setq elm (car deflist))
570 (setq chars (car elm)
571 len (length chars)
572 syntax (nth 1 elm)
573 category (nth 2 elm)
574 i 0)
575 (while (< i len)
576 (if (= (aref chars i) ?-)
577 (setq i (1+ i)
4a027a0d
KH
578 to (aref chars i))
579 (setq ch (aref chars i)
4ed46869
KH
580 to ch))
581 (while (<= ch to)
582 (modify-syntax-entry ch syntax)
583 (modify-category-entry ch category)
584 (setq ch (1+ ch)))
4a027a0d 585 (setq i (1+ i)))
4ed46869
KH
586 (setq deflist (cdr deflist))))
587
588;; Vietnamese character set
589
590(let ((lower (make-char 'vietnamese-viscii-lower))
591 (upper (make-char 'vietnamese-viscii-upper)))
592 (modify-syntax-entry lower "w")
593 (modify-syntax-entry upper "w")
594 (modify-category-entry lower ?v)
595 (modify-category-entry upper ?v)
596 (modify-category-entry lower ?l) ; To make a word with
597 (modify-category-entry upper ?l) ; latin characters.
598 )
599
e5dd1155
KH
600(let ((tbl (standard-case-table))
601 (i 32))
602 (while (< i 128)
603 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
604 (make-char 'vietnamese-viscii-lower i)
605 tbl)
606 (setq i (1+ i))))
607
d05cfa1f
KH
608;; Unicode (mule-unicode-0100-24ff)
609
85ef8ece
KH
610(let ((tbl (standard-case-table)) c)
611
612;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
613;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
614;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
615;; Thus we have to check language-environment to handle casing
616;; correctly. Currently only I<->i is available.
617
618;; case-syntax-pair's are not yet given for Latin Extendet-B
619
620 ;; Latin Extended-A, Latin Extended-B
621 (setq c #x0100)
622 (while (<= c #x0233)
d05cfa1f 623 (modify-category-entry (decode-char 'ucs c) ?l)
85ef8ece
KH
624 (and (or (<= c #x012e)
625 (and (>= c #x014a) (<= c #x0177)))
d05cfa1f
KH
626 (zerop (% c 2))
627 (set-case-syntax-pair
628 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
85ef8ece
KH
629 (and (>= c #x013a)
630 (<= c #x0148)
631 (zerop (% c 2))
632 (set-case-syntax-pair
633 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
d05cfa1f 634 (setq c (1+ c)))
85ef8ece
KH
635 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
636 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
637 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
638; (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl) ; these two have different length!
d05cfa1f
KH
639 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
640 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
641 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
642
85ef8ece
KH
643 ;; Latin Extended Additional
644 (setq c #x1e00)
d05cfa1f
KH
645 (while (<= c #x1ef9)
646 (modify-category-entry (decode-char 'ucs c) ?l)
647 (and (zerop (% c 2))
648 (or (<= c #x1e94) (>= c #x1ea0))
649 (set-case-syntax-pair
650 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
651 (setq c (1+ c)))
652
85ef8ece
KH
653 ;; Greek
654 (setq c #x0370)
d05cfa1f
KH
655 (while (<= c #x03ff)
656 (modify-category-entry (decode-char 'ucs c) ?g)
657 (if (or (and (>= c #x0391) (<= c #x03a1))
658 (and (>= c #x03a3) (<= c #x03ab)))
659 (set-case-syntax-pair
660 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
661 (and (>= c #x03da)
662 (<= c #x03ee)
663 (zerop (% c 2))
664 (set-case-syntax-pair
665 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
666 (setq c (1+ c)))
667 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
668 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
669 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
670 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
671 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
672 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
673 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
674
85ef8ece
KH
675 ;; Greek Extended
676 (setq c #x1f00)
d05cfa1f
KH
677 (while (<= c #x1fff)
678 (modify-category-entry (decode-char 'ucs c) ?g)
679 (and (<= (logand c #x000f) 7)
680 (<= c #x1fa7)
681 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
682 (/= (logand c #x00f0) 7)
683 (set-case-syntax-pair
684 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
685 (setq c (1+ c)))
686 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
687 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
688 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
689 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
690 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
691 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
692 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
693 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
694 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
695 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
696 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
697 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
698 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
699 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
700 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
701 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
702 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
703 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
704 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
705 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
706 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
707 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
708 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
709 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
710
85ef8ece
KH
711 ;; cyrillic
712 (setq c #x0400)
d05cfa1f
KH
713 (while (<= c #x04ff)
714 (modify-category-entry (decode-char 'ucs c) ?y)
715 (and (>= c #x0400)
716 (<= c #x040f)
717 (set-case-syntax-pair
718 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
719 (and (>= c #x0410)
720 (<= c #x042f)
721 (set-case-syntax-pair
722 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
723 (and (zerop (% c 2))
724 (or (and (>= c #x0460) (<= c #x0480))
725 (and (>= c #x048c) (<= c #x04be))
726 (and (>= c #x04d0) (<= c #x04f4)))
727 (set-case-syntax-pair
728 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
729 (setq c (1+ c)))
730 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
731 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
732 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
733 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
734 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
735
85ef8ece
KH
736 ;; general punctuation
737 (setq c #x2000)
d05cfa1f
KH
738 (while (<= c #x200b)
739 (set-case-syntax c " " tbl)
740 (setq c (1+ c)))
741 (setq c #x2010)
742 (while (<= c #x2027)
743 (set-case-syntax c "_" tbl)
744 (setq c (1+ c)))
745 )
746
4ed46869
KH
747\f
748;;; Setting word boundary.
749
750(setq word-combining-categories
751 '((?l . ?l)))
752
753(setq word-separating-categories ; (2-byte character sets)
754 '((?A . ?K) ; Alpha numeric - Katakana
755 (?A . ?C) ; Alpha numeric - Chinese
756 (?H . ?A) ; Hiragana - Alpha numeric
757 (?H . ?K) ; Hiragana - Katakana
758 (?H . ?C) ; Hiragana - Chinese
759 (?K . ?A) ; Katakana - Alpha numeric
760 (?K . ?C) ; Katakana - Chinese
761 (?C . ?A) ; Chinese - Alpha numeric
762 (?C . ?K) ; Chinese - Katakana
763 ))
777cfce6
KH
764
765\f
766;; For each character set, put the information of the most proper
aaa9f206 767;; coding system to encode it by `preferred-coding-system' property.
777cfce6
KH
768
769(let ((l '((latin-iso8859-1 . iso-latin-1)
770 (latin-iso8859-2 . iso-latin-2)
771 (latin-iso8859-3 . iso-latin-3)
772 (latin-iso8859-4 . iso-latin-4)
773 (thai-tis620 . thai-tis620)
774 (greek-iso8859-7 . greek-iso-8bit)
775 (arabic-iso8859-6 . iso-2022-7bit)
776 (hebrew-iso8859-8 . hebrew-iso-8bit)
777 (katakana-jisx0201 . japanese-shift-jis)
778 (latin-jisx0201 . japanese-shift-jis)
779 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
780 (latin-iso8859-9 . iso-latin-5)
781 (japanese-jisx0208-1978 . iso-2022-jp)
782 (chinese-gb2312 . cn-gb-2312)
783 (japanese-jisx0208 . iso-2022-jp)
784 (korean-ksc5601 . iso-2022-kr)
785 (japanese-jisx0212 . iso-2022-jp)
786 (chinese-cns11643-1 . iso-2022-cn)
787 (chinese-cns11643-2 . iso-2022-cn)
788 (chinese-big5-1 . chinese-big5)
789 (chinese-big5-2 . chinese-big5)
790 (chinese-sisheng . iso-2022-7bit)
791 (ipa . iso-2022-7bit)
792 (vietnamese-viscii-lower . vietnamese-viscii)
793 (vietnamese-viscii-upper . vietnamese-viscii)
794 (arabic-digit . iso-2022-7bit)
795 (arabic-1-column . iso-2022-7bit)
796 (ascii-right-to-left . iso-2022-7bit)
797 (lao . lao)
798 (arabic-2-column . iso-2022-7bit)
799 (indian-is13194 . devanagari)
800 (indian-1-column . devanagari)
801 (tibetan-1-column . tibetan)
58cd41a3 802 (ethiopic . iso-2022-7bit)
777cfce6
KH
803 (chinese-cns11643-3 . iso-2022-cn)
804 (chinese-cns11643-4 . iso-2022-cn)
805 (chinese-cns11643-5 . iso-2022-cn)
806 (chinese-cns11643-6 . iso-2022-cn)
807 (chinese-cns11643-7 . iso-2022-cn)
808 (indian-2-column . devanagari)
7a860cf2
DL
809 (tibetan . tibetan)
810 (latin-iso8859-14 . iso-latin-8)
811 (latin-iso8859-15 . iso-latin-9))))
777cfce6 812 (while l
aaa9f206 813 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
777cfce6 814 (setq l (cdr l))))
df0415c5
KH
815
816\f
98a663f1
KH
817;; Setup auto-fill-chars for charsets that should invoke auto-filling.
818;; SPACE and NEWLIE are already set. Also put `nospace-between-words'
819;; property to the charsets.
df0415c5
KH
820(let ((l '(katakana-jisx0201
821 japanese-jisx0208 japanese-jisx0212
822 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
823 (while l
824 (aset auto-fill-chars (make-char (car l)) t)
98a663f1 825 (put-charset-property (car l) 'nospace-between-words t)
df0415c5 826 (setq l (cdr l))))
777cfce6
KH
827
828;;; Local Variables:
829;;; coding: iso-2022-7bit
830;;; End:
831
60370d40 832;;; characters.el ends here