Fix previous change.
[bpt/emacs.git] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
6
7 ;; Keywords: multibyte character, character set, syntax, category
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;; This file contains multibyte characters. Save this file always in
29 ;; the coding system `iso-2022-7bit'.
30
31 ;; This file does not define the syntax for Latin-N character sets;
32 ;; those are defined by the files latin-N.el.
33
34 ;;; Code:
35
36 ;; We must set utf-translate-cjk-mode to nil while loading this file
37 ;; to avoid translating CJK characters in decode-char.
38 (defvar saved-utf-translate-cjk-mode utf-translate-cjk-mode)
39 (setq utf-translate-cjk-mode nil)
40
41 ;;; Predefined categories.
42
43 ;; For each character set.
44
45 (define-category ?a "ASCII")
46 (define-category ?l "Latin")
47 (define-category ?t "Thai")
48 (define-category ?g "Greek")
49 (define-category ?b "Arabic")
50 (define-category ?w "Hebrew")
51 (define-category ?y "Cyrillic")
52 (define-category ?k "Japanese katakana")
53 (define-category ?r "Japanese roman")
54 (define-category ?c "Chinese")
55 (define-category ?j "Japanese")
56 (define-category ?h "Korean")
57 (define-category ?e "Ethiopic (Ge'ez)")
58 (define-category ?v "Vietnamese")
59 (define-category ?i "Indian")
60 (define-category ?o "Lao")
61 (define-category ?q "Tibetan")
62
63 ;; For each group (row) of 2-byte character sets.
64
65 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
66 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
67 (define-category ?G "Greek characters of 2-byte character sets")
68 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
69 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
70 (define-category ?N "Korean Hangul characters of 2-byte character sets")
71 (define-category ?Y "Cyrillic characters of 2-byte character sets")
72 (define-category ?I "Indian Glyphs")
73
74 ;; For phonetic classifications.
75
76 (define-category ?0 "consonant")
77 (define-category ?1 "base (independent) vowel")
78 (define-category ?2 "upper diacritical mark (including upper vowel)")
79 (define-category ?3 "lower diacritical mark (including lower vowel)")
80 (define-category ?4 "tone mark")
81 (define-category ?5 "symbol")
82 (define-category ?6 "digit")
83 (define-category ?7 "vowel-modifying diacritical mark")
84 (define-category ?8 "vowel-signs")
85 (define-category ?9 "semivowel lower")
86
87 ;; For filling.
88 (define-category ?| "While filling, we can break a line at this character.")
89
90 ;; For indentation calculation.
91 (define-category ?\s
92 "This character counts as a space for indentation purposes.")
93
94 ;; Keep the following for `kinsoku' processing. See comments in
95 ;; kinsoku.el.
96 (define-category ?> "A character which can't be placed at beginning of line.")
97 (define-category ?< "A character which can't be placed at end of line.")
98
99 ;; Combining
100 (define-category ?^ "Combining diacritic or mark")
101 \f
102 ;;; Setting syntax and category.
103
104 ;; ASCII
105
106 (let ((ch 32))
107 (while (< ch 127) ; All ASCII characters have
108 (modify-category-entry ch ?a) ; the category `a' (ASCII)
109 (modify-category-entry ch ?l) ; and `l' (Latin).
110 (setq ch (1+ ch))))
111
112 ;; Arabic character set
113
114 (let ((charsets '(arabic-iso8859-6
115 arabic-digit
116 arabic-1-column
117 arabic-2-column)))
118 (while charsets
119 ;; (modify-syntax-entry (make-char (car charsets)) "w")
120 (modify-category-entry (make-char (car charsets)) ?b)
121 (setq charsets (cdr charsets))))
122 (let ((ch #x600))
123 (while (<= ch #x6ff)
124 (modify-category-entry (decode-char 'ucs ch) ?b)
125 (setq ch (1+ ch)))
126 (setq ch #xfb50)
127 (while (<= ch #xfdff)
128 (modify-category-entry (decode-char 'ucs ch) ?b)
129 (setq ch (1+ ch)))
130 (setq ch #xfe70)
131 (while (<= ch #xfefe)
132 (modify-category-entry (decode-char 'ucs ch) ?b)
133 (setq ch (1+ ch))))
134
135 ;; Chinese character set (GB2312)
136
137 ;; (modify-syntax-entry (make-char 'chinese-gb2312) "w")
138 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_")
139 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_")
140 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_")
141 (modify-syntax-entry ?\\e$A!2\e(B "(\e$A!3\e(B")
142 (modify-syntax-entry ?\\e$A!4\e(B "(\e$A!5\e(B")
143 (modify-syntax-entry ?\\e$A!6\e(B "(\e$A!7\e(B")
144 (modify-syntax-entry ?\\e$A!8\e(B "(\e$A!9\e(B")
145 (modify-syntax-entry ?\\e$A!:\e(B "(\e$A!;\e(B")
146 (modify-syntax-entry ?\\e$A!<\e(B "(\e$A!=\e(B")
147 (modify-syntax-entry ?\\e$A!>\e(B "(\e$A!?\e(B")
148 (modify-syntax-entry ?\\e$A#(\e(B "(\e$A#)\e(B")
149 (modify-syntax-entry ?\\e$A#{\e(B "(\e$A#}\e(B")
150 (modify-syntax-entry ?\\e$A#[\e(B "(\e$A#]\e(B")
151 (modify-syntax-entry ?\\e$A!3\e(B ")\e$A!2\e(B")
152 (modify-syntax-entry ?\\e$A!5\e(B ")\e$A!4\e(B")
153 (modify-syntax-entry ?\\e$A!7\e(B ")\e$A!6\e(B")
154 (modify-syntax-entry ?\\e$A!9\e(B ")\e$A!8\e(B")
155 (modify-syntax-entry ?\\e$A!;\e(B ")\e$A!:\e(B")
156 (modify-syntax-entry ?\\e$A!=\e(B ")\e$A!<\e(B")
157 (modify-syntax-entry ?\\e$A!?\e(B ")\e$A!>\e(B")
158 (modify-syntax-entry ?\\e$A#)\e(B ")\e$A#(\e(B")
159 (modify-syntax-entry ?\\e$A#}\e(B ")\e$A#{\e(B")
160 (modify-syntax-entry ?\\e$A#]\e(B ")\e$A#[\e(B")
161
162 (let ((chars "\e$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d\e(B"))
163 (dotimes (i (length chars))
164 (modify-syntax-entry (aref chars i) ".")))
165
166 (modify-category-entry (make-char 'chinese-gb2312) ?c)
167 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
168 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
169 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
170 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
171 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
172 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
173 (let ((row 48))
174 (while (< row 127)
175 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
176 (setq row (1+ row))))
177
178 ;; Chinese character set (BIG5)
179
180 (let ((from (decode-big5-char #xA141))
181 (to (decode-big5-char #xA15D)))
182 (while (< from to)
183 (modify-syntax-entry from ".")
184 (setq from (1+ from))))
185 (let ((from (decode-big5-char #xA1A5))
186 (to (decode-big5-char #xA1AD)))
187 (while (< from to)
188 (modify-syntax-entry from ".")
189 (setq from (1+ from))))
190 (let ((from (decode-big5-char #xA1AD))
191 (to (decode-big5-char #xA2AF)))
192 (while (< from to)
193 (modify-syntax-entry from "_")
194 (setq from (1+ from))))
195
196 (let ((parens "\e$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
197 open close)
198 (dotimes (i (/ (length parens) 2))
199 (setq open (aref parens (* i 2))
200 close (aref parens (1+ (* i 2))))
201 (modify-syntax-entry open (format "(%c" close))
202 (modify-syntax-entry close (format ")%c" open))))
203
204 (let ((generic-big5-1-char (make-char 'chinese-big5-1))
205 (generic-big5-2-char (make-char 'chinese-big5-2)))
206 ;; (modify-syntax-entry generic-big5-1-char "w")
207 ;; (modify-syntax-entry generic-big5-2-char "w")
208
209 (modify-category-entry generic-big5-1-char ?c)
210 (modify-category-entry generic-big5-2-char ?c)
211
212 (modify-category-entry generic-big5-1-char ?C)
213 (modify-category-entry generic-big5-2-char ?C)
214
215 (modify-category-entry generic-big5-1-char ?\|)
216 (modify-category-entry generic-big5-2-char ?\|))
217
218
219 ;; Chinese character set (CNS11643)
220
221 (let ((cns-list '(chinese-cns11643-1
222 chinese-cns11643-2
223 chinese-cns11643-3
224 chinese-cns11643-4
225 chinese-cns11643-5
226 chinese-cns11643-6
227 chinese-cns11643-7))
228 generic-char)
229 (while cns-list
230 (setq generic-char (make-char (car cns-list)))
231 ;; (modify-syntax-entry generic-char "w")
232 (modify-category-entry generic-char ?c)
233 (modify-category-entry generic-char ?C)
234 (modify-category-entry generic-char ?|)
235 (setq cns-list (cdr cns-list))))
236
237 (let ((parens "\e$(G!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c\e(B")
238 open close)
239 (dotimes (i (/ (length parens) 2))
240 (setq open (aref parens (* i 2))
241 close (aref parens (1+ (* i 2))))
242 (modify-syntax-entry open (format "(%c" close))
243 (modify-syntax-entry close (format ")%c" open))))
244
245 ;; Cyrillic character set (ISO-8859-5)
246
247 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y)
248
249 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
250 (modify-syntax-entry ?\e,L-\e(B ".")
251 (modify-syntax-entry ?\e,Lp\e(B ".")
252 (modify-syntax-entry ?\e,L}\e(B ".")
253 (let ((tbl (standard-case-table)))
254 (set-case-syntax-pair ?\e,L!\e(B ?\e,Lq\e(B tbl)
255 (set-case-syntax-pair ?\e,L"\e(B ?\e,Lr\e(B tbl)
256 (set-case-syntax-pair ?\e,L#\e(B ?\e,Ls\e(B tbl)
257 (set-case-syntax-pair ?\e,L$\e(B ?\e,Lt\e(B tbl)
258 (set-case-syntax-pair ?\e,L%\e(B ?\e,Lu\e(B tbl)
259 (set-case-syntax-pair ?\e,L&\e(B ?\e,Lv\e(B tbl)
260 (set-case-syntax-pair ?\e,L'\e(B ?\e,Lw\e(B tbl)
261 (set-case-syntax-pair ?\e,L(\e(B ?\e,Lx\e(B tbl)
262 (set-case-syntax-pair ?\e,L)\e(B ?\e,Ly\e(B tbl)
263 (set-case-syntax-pair ?\e,L*\e(B ?\e,Lz\e(B tbl)
264 (set-case-syntax-pair ?\e,L+\e(B ?\e,L{\e(B tbl)
265 (set-case-syntax-pair ?\e,L,\e(B ?\e,L|\e(B tbl)
266 (set-case-syntax-pair ?\e,L.\e(B ?\e,L~\e(B tbl)
267 (set-case-syntax-pair ?\e,L/\e(B ?\e,L\7f\e(B tbl)
268 (set-case-syntax-pair ?\e,L0\e(B ?\e,LP\e(B tbl)
269 (set-case-syntax-pair ?\e,L1\e(B ?\e,LQ\e(B tbl)
270 (set-case-syntax-pair ?\e,L2\e(B ?\e,LR\e(B tbl)
271 (set-case-syntax-pair ?\e,L3\e(B ?\e,LS\e(B tbl)
272 (set-case-syntax-pair ?\e,L4\e(B ?\e,LT\e(B tbl)
273 (set-case-syntax-pair ?\e,L5\e(B ?\e,LU\e(B tbl)
274 (set-case-syntax-pair ?\e,L6\e(B ?\e,LV\e(B tbl)
275 (set-case-syntax-pair ?\e,L7\e(B ?\e,LW\e(B tbl)
276 (set-case-syntax-pair ?\e,L8\e(B ?\e,LX\e(B tbl)
277 (set-case-syntax-pair ?\e,L9\e(B ?\e,LY\e(B tbl)
278 (set-case-syntax-pair ?\e,L:\e(B ?\e,LZ\e(B tbl)
279 (set-case-syntax-pair ?\e,L;\e(B ?\e,L[\e(B tbl)
280 (set-case-syntax-pair ?\e,L<\e(B ?\e,L\\e(B tbl)
281 (set-case-syntax-pair ?\e,L=\e(B ?\e,L]\e(B tbl)
282 (set-case-syntax-pair ?\e,L>\e(B ?\e,L^\e(B tbl)
283 (set-case-syntax-pair ?\e,L?\e(B ?\e,L_\e(B tbl)
284 (set-case-syntax-pair ?\e,L@\e(B ?\e,L`\e(B tbl)
285 (set-case-syntax-pair ?\e,LA\e(B ?\e,La\e(B tbl)
286 (set-case-syntax-pair ?\e,LB\e(B ?\e,Lb\e(B tbl)
287 (set-case-syntax-pair ?\e,LC\e(B ?\e,Lc\e(B tbl)
288 (set-case-syntax-pair ?\e,LD\e(B ?\e,Ld\e(B tbl)
289 (set-case-syntax-pair ?\e,LE\e(B ?\e,Le\e(B tbl)
290 (set-case-syntax-pair ?\e,LF\e(B ?\e,Lf\e(B tbl)
291 (set-case-syntax-pair ?\e,LG\e(B ?\e,Lg\e(B tbl)
292 (set-case-syntax-pair ?\e,LH\e(B ?\e,Lh\e(B tbl)
293 (set-case-syntax-pair ?\e,LI\e(B ?\e,Li\e(B tbl)
294 (set-case-syntax-pair ?\e,LJ\e(B ?\e,Lj\e(B tbl)
295 (set-case-syntax-pair ?\e,LK\e(B ?\e,Lk\e(B tbl)
296 (set-case-syntax-pair ?\e,LL\e(B ?\e,Ll\e(B tbl)
297 (set-case-syntax-pair ?\e,LM\e(B ?\e,Lm\e(B tbl)
298 (set-case-syntax-pair ?\e,LN\e(B ?\e,Ln\e(B tbl)
299 (set-case-syntax-pair ?\e,LO\e(B ?\e,Lo\e(B tbl)
300 (set-case-syntax-pair ?\e$,1(!\e(B ?\e$,1(q\e(B tbl)
301 (set-case-syntax-pair ?\e$,1("\e(B ?\e$,1(r\e(B tbl)
302 (set-case-syntax-pair ?\e$,1(#\e(B ?\e$,1(s\e(B tbl)
303 (set-case-syntax-pair ?\e$,1($\e(B ?\e$,1(t\e(B tbl)
304 (set-case-syntax-pair ?\e$,1(%\e(B ?\e$,1(u\e(B tbl)
305 (set-case-syntax-pair ?\e$,1(&\e(B ?\e$,1(v\e(B tbl)
306 (set-case-syntax-pair ?\e$,1('\e(B ?\e$,1(w\e(B tbl)
307 (set-case-syntax-pair ?\e$,1((\e(B ?\e$,1(x\e(B tbl)
308 (set-case-syntax-pair ?\e$,1()\e(B ?\e$,1(y\e(B tbl)
309 (set-case-syntax-pair ?\e$,1(*\e(B ?\e$,1(z\e(B tbl)
310 (set-case-syntax-pair ?\e$,1(+\e(B ?\e$,1({\e(B tbl)
311 (set-case-syntax-pair ?\e$,1(,\e(B ?\e$,1(|\e(B tbl)
312 (set-case-syntax-pair ?\e$,1(.\e(B ?\e$,1(~\e(B tbl)
313 (set-case-syntax-pair ?\e$,1(/\e(B ?\e$,1(\7f\e(B tbl)
314 (set-case-syntax-pair ?\e$,1(0\e(B ?\e$,1(P\e(B tbl)
315 (set-case-syntax-pair ?\e$,1(1\e(B ?\e$,1(Q\e(B tbl)
316 (set-case-syntax-pair ?\e$,1(2\e(B ?\e$,1(R\e(B tbl)
317 (set-case-syntax-pair ?\e$,1(3\e(B ?\e$,1(S\e(B tbl)
318 (set-case-syntax-pair ?\e$,1(4\e(B ?\e$,1(T\e(B tbl)
319 (set-case-syntax-pair ?\e$,1(5\e(B ?\e$,1(U\e(B tbl)
320 (set-case-syntax-pair ?\e$,1(6\e(B ?\e$,1(V\e(B tbl)
321 (set-case-syntax-pair ?\e$,1(7\e(B ?\e$,1(W\e(B tbl)
322 (set-case-syntax-pair ?\e$,1(8\e(B ?\e$,1(X\e(B tbl)
323 (set-case-syntax-pair ?\e$,1(9\e(B ?\e$,1(Y\e(B tbl)
324 (set-case-syntax-pair ?\e$,1(:\e(B ?\e$,1(Z\e(B tbl)
325 (set-case-syntax-pair ?\e$,1(;\e(B ?\e$,1([\e(B tbl)
326 (set-case-syntax-pair ?\e$,1(<\e(B ?\e$,1(\\e(B tbl)
327 (set-case-syntax-pair ?\e$,1(=\e(B ?\e$,1(]\e(B tbl)
328 (set-case-syntax-pair ?\e$,1(>\e(B ?\e$,1(^\e(B tbl)
329 (set-case-syntax-pair ?\e$,1(?\e(B ?\e$,1(_\e(B tbl)
330 (set-case-syntax-pair ?\e$,1(@\e(B ?\e$,1(`\e(B tbl)
331 (set-case-syntax-pair ?\e$,1(A\e(B ?\e$,1(a\e(B tbl)
332 (set-case-syntax-pair ?\e$,1(B\e(B ?\e$,1(b\e(B tbl)
333 (set-case-syntax-pair ?\e$,1(C\e(B ?\e$,1(c\e(B tbl)
334 (set-case-syntax-pair ?\e$,1(D\e(B ?\e$,1(d\e(B tbl)
335 (set-case-syntax-pair ?\e$,1(E\e(B ?\e$,1(e\e(B tbl)
336 (set-case-syntax-pair ?\e$,1(F\e(B ?\e$,1(f\e(B tbl)
337 (set-case-syntax-pair ?\e$,1(G\e(B ?\e$,1(g\e(B tbl)
338 (set-case-syntax-pair ?\e$,1(H\e(B ?\e$,1(h\e(B tbl)
339 (set-case-syntax-pair ?\e$,1(I\e(B ?\e$,1(i\e(B tbl)
340 (set-case-syntax-pair ?\e$,1(J\e(B ?\e$,1(j\e(B tbl)
341 (set-case-syntax-pair ?\e$,1(K\e(B ?\e$,1(k\e(B tbl)
342 (set-case-syntax-pair ?\e$,1(L\e(B ?\e$,1(l\e(B tbl)
343 (set-case-syntax-pair ?\e$,1(M\e(B ?\e$,1(m\e(B tbl)
344 (set-case-syntax-pair ?\e$,1(N\e(B ?\e$,1(n\e(B tbl)
345 (set-case-syntax-pair ?\e$,1(O\e(B ?\e$,1(o\e(B tbl))
346
347 ;; Devanagari character set
348
349 ;;; Commented out since the categories appear not to be used anywhere
350 ;;; and word syntax is the default.
351 ;; (let ((deflist '(;; chars syntax category
352 ;; ("\e$(5!!!"!#\e(B" "w" ?7) ; vowel-modifying diacritical mark
353 ;; ; chandrabindu, anuswar, visarga
354 ;; ("\e$(5!$\e(B-\e$(5!2\e(B" "w" ?1) ; independent vowel
355 ;; ("\e$(5!3\e(B-\e$(5!X\e(B" "w" ?0) ; consonant
356 ;; ("\e$(5!Z\e(B-\e$(5!g\e(B" "w" ?8) ; matra
357 ;; ("\e$(5!q\e(B-\e$(5!z\e(B" "w" ?6) ; digit
358 ;; ;; Unicode equivalents
359 ;; ("\e$,15A5B5C\e(B" "w" ?7) ; vowel-modifying diacritical mark
360 ;; ; chandrabindu, anuswar, visarga
361 ;; ("\e$,15E\e(B-\e$,15M\e(B" "w" ?1) ; independent vowel
362 ;; ("\e$,15U\e(B-\e$,15y\e(B" "w" ?0) ; consonant
363 ;; ("\e$,15~\e(B-\e$,16)\e(B" "w" ?8) ; matra
364 ;; ("\e$,16F\e(B-\e$,16O\e(B" "w" ?6) ; digit
365 ;; ))
366 ;; elm chars len syntax category to ch i)
367 ;; (while deflist
368 ;; (setq elm (car deflist))
369 ;; (setq chars (car elm)
370 ;; len (length chars)
371 ;; syntax (nth 1 elm)
372 ;; category (nth 2 elm)
373 ;; i 0)
374 ;; (while (< i len)
375 ;; (if (= (aref chars i) ?-)
376 ;; (setq i (1+ i)
377 ;; to (aref chars i))
378 ;; (setq ch (aref chars i)
379 ;; to ch))
380 ;; (while (<= ch to)
381 ;; (modify-syntax-entry ch syntax)
382 ;; (modify-category-entry ch category)
383 ;; (setq ch (1+ ch)))
384 ;; (setq i (1+ i)))
385 ;; (setq deflist (cdr deflist))))
386
387 ;; Ethiopic character set
388
389 (modify-category-entry (make-char 'ethiopic) ?e)
390 ;; (modify-syntax-entry (make-char 'ethiopic) "w")
391 (dotimes (i (1+ (- #x137c #x1200)))
392 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
393 (let ((chars '(?\e$(3$h\e(B ?\e$(3$i\e(B ?\e$(3$j\e(B ?\e$(3$k\e(B ?\e$(3$l\e(B ?\e$(3$m\e(B ?\e$(3$n\e(B ?\e$(3$o\e(B ?\e$(3%i\e(B ?\e$(3%t\e(B ?\e$(3%u\e(B ?\e$(3%v\e(B ?\e$(3%w\e(B ?\e$(3%x\e(B
394 ;; Unicode equivalents of the above:
395 ?\e$,1Q!\e(B ?\e$,1Q"\e(B ?\e$,1Q#\e(B ?\e$,1Q$\e(B ?\e$,1Q%\e(B ?\e$,1Q&\e(B ?\e$,1Q'\e(B ?\e$,1Q(\e(B ?\e$,3op\e(B ?\e$,3o{\e(B ?\e$,3o|\e(B ?\e$,3o}\e(B ?\e$,3o~\e(B ?\e$,3o\7f\e(B)))
396 (while chars
397 (modify-syntax-entry (car chars) ".")
398 (setq chars (cdr chars))))
399
400 ;; Greek character set (ISO-8859-7)
401
402 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
403 (let ((c #x370))
404 (while (<= c #x3ff)
405 (modify-category-entry (decode-char 'ucs c) ?g)
406 (setq c (1+ c))))
407
408 ;; (let ((c 182))
409 ;; (while (< c 255)
410 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
411 ;; (setq c (1+ c))))
412 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
413 (modify-syntax-entry ?\e,F7\e(B ".")
414 (modify-syntax-entry ?\e,F;\e(B ".")
415 (modify-syntax-entry ?\e,F=\e(B ".")
416 (let ((tbl (standard-case-table)))
417 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
418 ;; in several cases.
419 (set-case-syntax ?\e,F!\e(B "." tbl)
420 (set-case-syntax ?\e,F"\e(B "." tbl)
421 (set-case-syntax ?\e,F&\e(B "." tbl)
422 (set-case-syntax ?\e,F&\e(B "_" tbl)
423 (set-case-syntax ?\e,F'\e(B "." tbl)
424 (set-case-syntax ?\e,F)\e(B "_" tbl)
425 (set-case-syntax ?\e,F+\e(B "." tbl)
426 (set-case-syntax ?\e,F,\e(B "_" tbl)
427 (set-case-syntax ?\e,F-\e(B "_" tbl)
428 (set-case-syntax ?\e,F/\e(B "." tbl)
429 (set-case-syntax ?\e,F0\e(B "_" tbl)
430 (set-case-syntax ?\e,F1\e(B "_" tbl)
431 ;; (set-case-syntax ?\e,F7\e(B "_" tbl)
432 ;; (set-case-syntax ?\e,F=\e(B "_" tbl)
433 (set-case-syntax-pair ?\e,FA\e(B ?\e,Fa\e(B tbl)
434 (set-case-syntax-pair ?\e,FB\e(B ?\e,Fb\e(B tbl)
435 (set-case-syntax-pair ?\e,FC\e(B ?\e,Fc\e(B tbl)
436 (set-case-syntax-pair ?\e,FD\e(B ?\e,Fd\e(B tbl)
437 (set-case-syntax-pair ?\e,FE\e(B ?\e,Fe\e(B tbl)
438 (set-case-syntax-pair ?\e,FF\e(B ?\e,Ff\e(B tbl)
439 (set-case-syntax-pair ?\e,FG\e(B ?\e,Fg\e(B tbl)
440 (set-case-syntax-pair ?\e,FH\e(B ?\e,Fh\e(B tbl)
441 (set-case-syntax-pair ?\e,FI\e(B ?\e,Fi\e(B tbl)
442 (set-case-syntax-pair ?\e,FJ\e(B ?\e,Fj\e(B tbl)
443 (set-case-syntax-pair ?\e,FK\e(B ?\e,Fk\e(B tbl)
444 (set-case-syntax-pair ?\e,FL\e(B ?\e,Fl\e(B tbl)
445 (set-case-syntax-pair ?\e,FM\e(B ?\e,Fm\e(B tbl)
446 (set-case-syntax-pair ?\e,FN\e(B ?\e,Fn\e(B tbl)
447 (set-case-syntax-pair ?\e,FO\e(B ?\e,Fo\e(B tbl)
448 (set-case-syntax-pair ?\e,FP\e(B ?\e,Fp\e(B tbl)
449 (set-case-syntax-pair ?\e,FQ\e(B ?\e,Fq\e(B tbl)
450 (set-case-syntax-pair ?\e,FS\e(B ?\e,Fs\e(B tbl)
451 (set-case-syntax-pair ?\e,FT\e(B ?\e,Ft\e(B tbl)
452 (set-case-syntax-pair ?\e,FU\e(B ?\e,Fu\e(B tbl)
453 (set-case-syntax-pair ?\e,FV\e(B ?\e,Fv\e(B tbl)
454 (set-case-syntax-pair ?\e,FW\e(B ?\e,Fw\e(B tbl)
455 (set-case-syntax-pair ?\e,FX\e(B ?\e,Fx\e(B tbl)
456 (set-case-syntax-pair ?\e,FY\e(B ?\e,Fy\e(B tbl)
457 (set-case-syntax-pair ?\e,FZ\e(B ?\e,Fz\e(B tbl)
458 (set-case-syntax-pair ?\e,F[\e(B ?\e,F{\e(B tbl)
459 (set-case-syntax-pair ?\e,F?\e(B ?\e,F~\e(B tbl)
460 (set-case-syntax-pair ?\e,F>\e(B ?\e,F}\e(B tbl)
461 (set-case-syntax-pair ?\e,F<\e(B ?\e,F|\e(B tbl)
462 (set-case-syntax-pair ?\e,F6\e(B ?\e,F\\e(B tbl)
463 (set-case-syntax-pair ?\e,F8\e(B ?\e,F]\e(B tbl)
464 (set-case-syntax-pair ?\e,F9\e(B ?\e,F^\e(B tbl)
465 (set-case-syntax-pair ?\e,F:\e(B ?\e,F_\e(B tbl)
466 ;; Unicode equivalents
467 (set-case-syntax-pair ?\e$,1&q\e(B ?\e$,1'1\e(B tbl)
468 (set-case-syntax-pair ?\e$,1&r\e(B ?\e$,1'2\e(B tbl)
469 (set-case-syntax-pair ?\e$,1&s\e(B ?\e$,1'3\e(B tbl)
470 (set-case-syntax-pair ?\e$,1&t\e(B ?\e$,1'4\e(B tbl)
471 (set-case-syntax-pair ?\e$,1&u\e(B ?\e$,1'5\e(B tbl)
472 (set-case-syntax-pair ?\e$,1&v\e(B ?\e$,1'6\e(B tbl)
473 (set-case-syntax-pair ?\e$,1&w\e(B ?\e$,1'7\e(B tbl)
474 (set-case-syntax-pair ?\e$,1&x\e(B ?\e$,1'8\e(B tbl)
475 (set-case-syntax-pair ?\e$,1&y\e(B ?\e$,1'9\e(B tbl)
476 (set-case-syntax-pair ?\e$,1&z\e(B ?\e$,1':\e(B tbl)
477 (set-case-syntax-pair ?\e$,1&{\e(B ?\e$,1';\e(B tbl)
478 (set-case-syntax-pair ?\e$,1&|\e(B ?\e$,1'<\e(B tbl)
479 (set-case-syntax-pair ?\e$,1&}\e(B ?\e$,1'=\e(B tbl)
480 (set-case-syntax-pair ?\e$,1&~\e(B ?\e$,1'>\e(B tbl)
481 (set-case-syntax-pair ?\e$,1&\7f\e(B ?\e$,1'?\e(B tbl)
482 (set-case-syntax-pair ?\e$,1' \e(B ?\e$,1'@\e(B tbl)
483 (set-case-syntax-pair ?\e$,1'!\e(B ?\e$,1'A\e(B tbl)
484 (set-case-syntax-pair ?\e$,1'#\e(B ?\e$,1'C\e(B tbl)
485 (set-case-syntax-pair ?\e$,1'$\e(B ?\e$,1'D\e(B tbl)
486 (set-case-syntax-pair ?\e$,1'%\e(B ?\e$,1'E\e(B tbl)
487 (set-case-syntax-pair ?\e$,1'&\e(B ?\e$,1'F\e(B tbl)
488 (set-case-syntax-pair ?\e$,1''\e(B ?\e$,1'G\e(B tbl)
489 (set-case-syntax-pair ?\e$,1'(\e(B ?\e$,1'H\e(B tbl)
490 (set-case-syntax-pair ?\e$,1')\e(B ?\e$,1'I\e(B tbl)
491 (set-case-syntax-pair ?\e$,1'*\e(B ?\e$,1'J\e(B tbl)
492 (set-case-syntax-pair ?\e$,1'+\e(B ?\e$,1'K\e(B tbl)
493 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
494 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
495 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
496 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
497 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
498 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
499 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl))
500
501 ;; Hebrew character set (ISO-8859-8)
502
503 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w)
504 (let ((c #x591))
505 (while (<= c #x5f4)
506 (modify-category-entry (decode-char 'ucs c) ?w)
507 (setq c (1+ c))))
508
509 (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
510 (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
511 (modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
512 (modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
513 (modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
514 (modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
515 (modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
516
517 ;; (let ((c 224))
518 ;; (while (< c 251)
519 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
520 ;; (setq c (1+ c))))
521 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
522
523 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
524
525 (modify-category-entry (make-char 'indian-is13194) ?i)
526 (modify-category-entry (make-char 'indian-2-column) ?I)
527 (modify-category-entry (make-char 'indian-glyph) ?I)
528 ;; Unicode Devanagari block
529 (let ((c #x901))
530 (while (<= c #x970)
531 (modify-category-entry (decode-char 'ucs c) ?i)
532 (setq c (1+ c))))
533
534 (let ((l '(;; RANGE CATEGORY MEANINGS
535 (#x01 #x03 ?7) ; vowel modifier
536 (#x05 #x14 ?1) ; base vowel
537 (#x15 #x39 ?0) ; consonants
538 (#x3e #x4d ?8) ; vowel modifier
539 (#x51 #x54 ?4) ; stress/tone mark
540 (#x58 #x5f ?0) ; consonants
541 (#x60 #x61 ?1) ; base vowel
542 (#x62 #x63 ?8) ; vowel modifier
543 (#x66 #x6f ?6) ; digits
544 )))
545 (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00))
546 (dolist (elt2 l)
547 (let* ((from (car elt2))
548 (counts (1+ (- (nth 1 elt2) from)))
549 (category (nth 2 elt2)))
550 (dotimes (i counts)
551 (modify-category-entry (decode-char 'ucs (+ elt1 from i))
552 category))))))
553
554 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
555
556 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
557 (modify-category-entry (make-char 'katakana-jisx0201) ?j)
558 (modify-category-entry (make-char 'latin-jisx0201) ?r)
559 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
560 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
561 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
562 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
563 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
564
565 ;; Unicode equivalents of JISX0201-kana
566 (let ((c #xff61))
567 (while (<= c #xff9f)
568 (modify-category-entry (decode-char 'ucs c) ?k)
569 (modify-category-entry (decode-char 'ucs c) ?j)
570 (modify-category-entry (decode-char 'ucs c) ?\|)
571 (setq c (1+ c))))
572
573 ;; Katakana block
574 (let ((c #x30a0))
575 (while (<= c #x30ff)
576 ;; ?K is double width, ?k isn't specified
577 (modify-category-entry (decode-char 'ucs c) ?k)
578 (modify-category-entry (decode-char 'ucs c) ?j)
579 (modify-category-entry (decode-char 'ucs c) ?\|)
580 (setq c (1+ c))))
581
582 ;; Hiragana block
583 (let ((c #x3040))
584 (while (<= c #x309f)
585 ;; ?H is actually defined to be double width
586 (modify-category-entry (decode-char 'ucs c) ?H)
587 ;;(modify-category-entry (decode-char 'ucs c) ?j)
588 (modify-category-entry (decode-char 'ucs c) ?\|)
589 (setq c (1+ c))))
590
591 ;; JISX0208
592 ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
593 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
594 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
595 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
596 (let ((chars '(?\e$B!<\e(B ?\e$B!+\e(B ?\e$B!,\e(B ?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
597 (while chars
598 (modify-syntax-entry (car chars) "w")
599 (setq chars (cdr chars))))
600 (let ((parens "\e$B!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![\e(B" )
601 open close)
602 (dotimes (i (/ (length parens) 2))
603 (setq open (aref parens (* i 2))
604 close (aref parens (1+ (* i 2))))
605 (modify-syntax-entry open (format "(%c" close))
606 (modify-syntax-entry close (format ")%c" open))))
607
608 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
609 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
610 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
611 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
612 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
613 (let ((row 48))
614 (while (< row 127)
615 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
616 (setq row (1+ row))))
617 (modify-category-entry ?\e$B!<\e(B ?K)
618 (let ((chars '(?\e$B!+\e(B ?\e$B!,\e(B)))
619 (while chars
620 (modify-category-entry (car chars) ?K)
621 (modify-category-entry (car chars) ?H)
622 (setq chars (cdr chars))))
623 (let ((chars '(?\e$B!3\e(B ?\e$B!4\e(B ?\e$B!5\e(B ?\e$B!6\e(B ?\e$B!7\e(B ?\e$B!8\e(B ?\e$B!9\e(B ?\e$B!:\e(B ?\e$B!;\e(B)))
624 (while chars
625 (modify-category-entry (car chars) ?C)
626 (setq chars (cdr chars))))
627
628 ;; JISX0212
629 ;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
630 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
631 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
632 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
633
634 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
635
636 ;; JISX0201-Kana
637 ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
638 (let ((chars '(?\e(I!\e(B ?\e(I$\e(B ?\e(I%\e(B
639 ;; Unicode:
640 ?\e$,3sa\e(B ?\e$,3sd\e(B ?\e$,3se\e(B)))
641 (while chars
642 (modify-syntax-entry (car chars) ".")
643 (setq chars (cdr chars))))
644
645 (modify-syntax-entry ?\\e(I"\e(B "(\e(I#\e(B")
646 (modify-syntax-entry ?\\e(I#\e(B "(\e(I"\e(B")
647
648 ;; Korean character set (KSC5601)
649
650 ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
651 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
652 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
653 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
654 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
655 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
656 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
657
658 (modify-category-entry (make-char 'korean-ksc5601) ?h)
659 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
660 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
661 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
662 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
663 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
664
665 (let ((parens "\e$(C!2!3!4!5!6!7!8!9!:!;!<!=#(#)#[#]#{#}\e(B" )
666 open close)
667 (dotimes (i (/ (length parens) 2))
668 (setq open (aref parens (* i 2))
669 close (aref parens (1+ (* i 2))))
670 (modify-syntax-entry open (format "(%c" close))
671 (modify-syntax-entry close (format ")%c" open))))
672
673 ;; Latin character set (latin-1,2,3,4,5,8,9)
674
675 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
676 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
677 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
678 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
679 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
680 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
681 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
682
683 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
684 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
685 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
686 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
687 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
688 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
689 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
690
691 ;; Lao character set
692
693 (modify-category-entry (make-char 'lao) ?o)
694 (dotimes (i (1+ (- #xeff #xe80)))
695 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
696
697 (let ((deflist '(;; chars syntax category
698 ("\e(1!\e(B-\e(1N\e(B" "w" ?0) ; consonant
699 ("\e(1PRS]`\e(B-\e(1d\e(B" "w" ?1) ; vowel base
700 ("\e(1QT\e(B-\e(1W[m\e(B" "w" ?2) ; vowel upper
701 ("\e(1XY\e(B" "w" ?3) ; vowel lower
702 ("\e(1h\e(B-\e(1l\e(B" "w" ?4) ; tone mark
703 ("\e(1\\e(B" "w" ?9) ; semivowel lower
704 ("\e(1p\e(B-\e(1y\e(B" "w" ?6) ; digit
705 ("\e(1Of\e(B" "_" ?5) ; symbol
706 ;; Unicode equivalents
707 ("\e$,1D!\e(B-\e$,1DN\e(B" "w" ?0) ; consonant
708 ("\e$,1DPDRDSD]D`\e(B-\e$,1Dd\e(B" "w" ?1) ; vowel base
709 ("\e$,1DQDT\e(B-\e$,1DWD[Dm\e(B" "w" ?2) ; vowel upper
710 ("\e$,1DXDY\e(B" "w" ?3) ; vowel lower
711 ("\e$,1Dh\e(B-\e$,1Dk\e(B" "w" ?4) ; tone mark
712 ("\e$,1D\D]\e(B" "w" ?9) ; semivowel lower
713 ("\e$,1Dp\e(B-\e$,1Dy\e(B" "w" ?6) ; digit
714 ("\e$,1DODf\e(B" "_" ?5) ; symbol
715 ))
716 elm chars len syntax category to ch i)
717 (while deflist
718 (setq elm (car deflist))
719 (setq chars (car elm)
720 len (length chars)
721 syntax (nth 1 elm)
722 category (nth 2 elm)
723 i 0)
724 (while (< i len)
725 (if (= (aref chars i) ?-)
726 (setq i (1+ i)
727 to (aref chars i))
728 (setq ch (aref chars i)
729 to ch))
730 (while (<= ch to)
731 (unless (string-equal syntax "w")
732 (modify-syntax-entry ch syntax))
733 (modify-category-entry ch category)
734 (setq ch (1+ ch)))
735 (setq i (1+ i)))
736 (setq deflist (cdr deflist))))
737
738 ;; Thai character set (TIS620)
739
740 (modify-category-entry (make-char 'thai-tis620) ?t)
741 (dotimes (i (1+ (- #xe7f #xe00)))
742 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
743
744 (let ((deflist '(;; chars syntax category
745 ("\e,T!\e(B-\e,TCEG\e(B-\e,TN\e(B" "w" ?0) ; consonant
746 ("\e,TDFPRS`\e(B-\e,Te\e(B" "w" ?1) ; vowel base
747 ("\e,TQT\e(B-\e,TWgn\e(B" "w" ?2) ; vowel upper
748 ("\e,TX\e(B-\e,TZ\e(B" "w" ?3) ; vowel lower
749 ("\e,Th\e(B-\e,Tm\e(B" "w" ?4) ; tone mark
750 ("\e,Tp\e(B-\e,Ty\e(B" "w" ?6) ; digit
751 ("\e,TOf_oz{\e(B" "_" ?5) ; symbol
752 ;; Unicode equivalents
753 ("\e$,1Ba\e(B-\e$,1C#C%C'\e(B-\e$,1C.\e(B" "w" ?0) ; consonant
754 ("\e$,1C$C&C0C2C3C@\e(B-\e$,1CE\e(B" "w" ?1) ; vowel base
755 ("\e$,1C1C4\e(B-\e$,1C7CGCN\e(B" "w" ?2) ; vowel upper
756 ("\e$,1C8\e(B-\e$,1C:\e(B" "w" ?3) ; vowel lower
757 ("\e$,1CH\e(B-\e$,1CM\e(B" "w" ?4) ; tone mark
758 ("\e$,1CP\e(B-\e$,1CY\e(B" "w" ?6) ; digit
759 ("\e$,1C/CFC?COCZC[\e(B" "_" ?5) ; symbol
760 ))
761 elm chars len syntax category to ch i)
762 (while deflist
763 (setq elm (car deflist))
764 (setq chars (car elm)
765 len (length chars)
766 syntax (nth 1 elm)
767 category (nth 2 elm)
768 i 0)
769 (while (< i len)
770 (if (= (aref chars i) ?-)
771 (setq i (1+ i)
772 to (aref chars i))
773 (setq ch (aref chars i)
774 to ch))
775 (while (<= ch to)
776 (unless (string-equal syntax "w")
777 (modify-syntax-entry ch syntax))
778 (modify-category-entry ch category)
779 (setq ch (1+ ch)))
780 (setq i (1+ i)))
781 (setq deflist (cdr deflist))))
782
783 ;; Tibetan character set
784
785 (modify-category-entry (make-char 'tibetan) ?q)
786 (modify-category-entry (make-char 'tibetan-1-column) ?q)
787 (dotimes (i (1+ (- #xfff #xf00)))
788 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
789
790 (let ((deflist '(;; chars syntax category
791 ("\e4\e$(7"!\e0"!\e1\e(B-\e4\e$(7"J\e0"J\e1\e4"K\e0"K\e1\e(B" "w" ?0) ; consonant
792 ("\e$(7#!\e(B-\e$(7#J#K#L#M!"!#\e(B" "w" ?0) ;
793 ("\e$(7$!\e(B-\e$(7$e\e(B" "w" ?0) ;
794 ("\e$(7%!\e(B-\e$(7%u\e(B" "w" ?0) ;
795 ("\e$(7"S"["\"]"^"a\e(B" "w" ?2) ; upper vowel
796 ("\e$(7"_"c"d"g"h"i"j"k"l\e(B" "w" ?2) ; upper modifier
797 ("\e$(7!I"Q"R"U"e!e!g\e(B" "w" ?3) ; lowel vowel/modifier
798 ("\e$(7!P\e(B-\e$(7!Y!Z\e(B-\e$(7!c\e(B" "w" ?6) ; digit
799 ("\e$(7!;!=\e(B-\e$(7!B!D"`\e(B" "." ?|) ; line-break char
800 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?|) ;
801 ("\e$(7!8!;!=\e(B-\e$(7!B!D"`!m!d\e(B" "." ?>) ; prohibition
802 ("\e$(8!;!=!?!@!A!D"`\e(B" "." ?>) ;
803 ("\e$(7!0\e(B-\e$(7!:!l#R#S"f\e(B" "." ?<) ; prohibition
804 ("\e$(7!C!E\e(B-\e$(7!H!J\e(B-\e$(7!O!f!h\e(B-\e$(7!k!n!o#O#P\e(B-\e$(7#`\e(B" "." ?q) ; others
805
806 ;; Unicode version (not complete)
807 ("\e$,1F \e(B-\e$,1FIFJ\e(B" "w" ?0) ; consonant
808 ("\e$,1Fp\e(B-\e$,1G9G:G;G<\e(B" "w" ?0) ;
809 ("\e$,1FRFZF[F\F]F`\e(B" "w" ?2) ; upper vowel
810 ("\e$,1F^FbFcFfFgFhFiFjFk\e(B" "w" ?2) ; upper modifier
811 ("\e$,1EYFPFQFTFdEuEw\e(B" "w" ?3) ; lowel vowel/modifier
812 ("\e$,1E`\e(B-\e$,1EiEj\e(B-\e$,1Es\e(B" "w" ?6) ; digit
813 ("\e$,1EKEM\e(B-\e$,1ERETF_\e(B" "." ?|) ; line-break char
814 ("\e$,1EHEKEM\e(B-\e$,1ERETF_E}Et\e(B" "." ?>) ; prohibition
815 ("\e$,1E@\e(B-\e$,1EJE|GAGBFe\e(B" "." ?<) ; prohibition
816 ("\e$,1ESEU\e(B-\e$,1EXEZ\e(B-\e$,1E_EvEx\e(B-\e$,1E{E~E\7fG>G?\e(B-\e$,1GO\e(B" "." ?q) ; others
817 ))
818 elm chars len syntax category to ch i)
819 (while deflist
820 (setq elm (car deflist))
821 (setq chars (car elm)
822 len (length chars)
823 syntax (nth 1 elm)
824 category (nth 2 elm)
825 i 0)
826 (while (< i len)
827 (if (= (aref chars i) ?-)
828 (setq i (1+ i)
829 to (aref chars i))
830 (setq ch (aref chars i)
831 to ch))
832 (while (<= ch to)
833 (unless (string-equal syntax "w")
834 (modify-syntax-entry ch syntax))
835 (modify-category-entry ch category)
836 (setq ch (1+ ch)))
837 (setq i (1+ i)))
838 (setq deflist (cdr deflist))))
839
840 ;; Vietnamese character set
841
842 (let ((lower (make-char 'vietnamese-viscii-lower))
843 (upper (make-char 'vietnamese-viscii-upper)))
844 ;; (modify-syntax-entry lower "w")
845 ;; (modify-syntax-entry upper "w")
846 (modify-category-entry lower ?v)
847 (modify-category-entry upper ?v)
848 (modify-category-entry lower ?l) ; To make a word with
849 (modify-category-entry upper ?l) ; latin characters.
850 )
851
852 (let ((tbl (standard-case-table))
853 (i 32))
854 (while (< i 128)
855 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i)
856 (make-char 'vietnamese-viscii-lower i)
857 tbl)
858 (setq i (1+ i))))
859
860 ;; Unicode (mule-unicode-0100-24ff)
861
862 (let ((tbl (standard-case-table)) c)
863
864 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
865 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
866 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
867 ;; Thus we have to check language-environment to handle casing
868 ;; correctly. Currently only I<->i is available.
869
870 ;; Latin Extended-A, Latin Extended-B
871 (setq c #x0100)
872 (while (<= c #x0233)
873 (modify-category-entry (decode-char 'ucs c) ?l)
874 (and (or (<= c #x012e)
875 (and (>= c #x014a) (<= c #x0177)))
876 (zerop (% c 2))
877 (set-case-syntax-pair
878 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
879 (and (>= c #x013a)
880 (<= c #x0148)
881 (zerop (% c 2))
882 (set-case-syntax-pair
883 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
884 (setq c (1+ c)))
885 (set-case-syntax-pair ?\e$,1 R\e(B ?\e$,1 S\e(B tbl)
886 (set-case-syntax-pair ?\e$,1 T\e(B ?\e$,1 U\e(B tbl)
887 (set-case-syntax-pair ?\e$,1 V\e(B ?\e$,1 W\e(B tbl)
888 ;;; (set-case-syntax-pair ?\e$,1!8\e(B ?\e,A\7f\e(B tbl) ; these two have different length!
889 (set-case-syntax-pair ?\e$,1!9\e(B ?\e$,1!:\e(B tbl)
890 (set-case-syntax-pair ?\e$,1!;\e(B ?\e$,1!<\e(B tbl)
891 (set-case-syntax-pair ?\e$,1!=\e(B ?\e$,1!>\e(B tbl)
892
893 ;; Latin Extended-B
894 (set-case-syntax-pair ?\e$,1!A\e(B ?\e$,1#S\e(B tbl)
895 (set-case-syntax-pair ?\e$,1!B\e(B ?\e$,1!C\e(B tbl)
896 (set-case-syntax-pair ?\e$,1!D\e(B ?\e$,1!E\e(B tbl)
897 (set-case-syntax-pair ?\e$,1!F\e(B ?\e$,1#T\e(B tbl)
898 (set-case-syntax-pair ?\e$,1!G\e(B ?\e$,1!H\e(B tbl)
899 (set-case-syntax-pair ?\e$,1!I\e(B ?\e$,1#V\e(B tbl)
900 (set-case-syntax-pair ?\e$,1!J\e(B ?\e$,1#W\e(B tbl)
901 (set-case-syntax-pair ?\e$,1!K\e(B ?\e$,1!L\e(B tbl)
902 (set-case-syntax-pair ?\e$,1!N\e(B ?\e$,1"=\e(B tbl)
903 (set-case-syntax-pair ?\e$,1!O\e(B ?\e$,1#Y\e(B tbl)
904 (set-case-syntax-pair ?\e$,1!P\e(B ?\e$,1#[\e(B tbl)
905 (set-case-syntax-pair ?\e$,1!Q\e(B ?\e$,1!R\e(B tbl)
906 (set-case-syntax-pair ?\e$,1!S\e(B ?\e$,1#`\e(B tbl)
907 (set-case-syntax-pair ?\e$,1!T\e(B ?\e$,1#c\e(B tbl)
908 (set-case-syntax-pair ?\e$,1!V\e(B ?\e$,1#i\e(B tbl)
909 (set-case-syntax-pair ?\e$,1!W\e(B ?\e$,1#h\e(B tbl)
910 (set-case-syntax-pair ?\e$,1!X\e(B ?\e$,1!Y\e(B tbl)
911 (set-case-syntax-pair ?\e$,1!\\e(B ?\e$,1#o\e(B tbl)
912 (set-case-syntax-pair ?\e$,1!]\e(B ?\e$,1#r\e(B tbl)
913 (set-case-syntax-pair ?\e$,1!_\e(B ?\e$,1#u\e(B tbl)
914 (set-case-syntax-pair ?\e$,1!`\e(B ?\e$,1!a\e(B tbl)
915 (set-case-syntax-pair ?\e$,1!b\e(B ?\e$,1!c\e(B tbl)
916 (set-case-syntax-pair ?\e$,1!d\e(B ?\e$,1!e\e(B tbl)
917 (set-case-syntax-pair ?\e$,1!f\e(B ?\e$,1$ \e(B tbl)
918 (set-case-syntax-pair ?\e$,1!g\e(B ?\e$,1!h\e(B tbl)
919 (set-case-syntax-pair ?\e$,1!i\e(B ?\e$,1$#\e(B tbl)
920 (set-case-syntax-pair ?\e$,1!l\e(B ?\e$,1!m\e(B tbl)
921 (set-case-syntax-pair ?\e$,1!n\e(B ?\e$,1$(\e(B tbl)
922 (set-case-syntax-pair ?\e$,1!o\e(B ?\e$,1!p\e(B tbl)
923 (set-case-syntax-pair ?\e$,1!q\e(B ?\e$,1$*\e(B tbl)
924 (set-case-syntax-pair ?\e$,1!r\e(B ?\e$,1$+\e(B tbl)
925 (set-case-syntax-pair ?\e$,1!s\e(B ?\e$,1!t\e(B tbl)
926 (set-case-syntax-pair ?\e$,1!u\e(B ?\e$,1!v\e(B tbl)
927 (set-case-syntax-pair ?\e$,1!w\e(B ?\e$,1$2\e(B tbl)
928 (set-case-syntax-pair ?\e$,1!x\e(B ?\e$,1!y\e(B tbl)
929 (set-case-syntax-pair ?\e$,1!|\e(B ?\e$,1!}\e(B tbl)
930 (set-case-syntax-pair ?\e$,1"$\e(B ?\e$,1"&\e(B tbl)
931 (set-case-syntax-pair ?\e$,1"%\e(B ?\e$,1"&\e(B tbl)
932 (set-case-syntax-pair ?\e$,1"'\e(B ?\e$,1")\e(B tbl)
933 (set-case-syntax-pair ?\e$,1"(\e(B ?\e$,1")\e(B tbl)
934 (set-case-syntax-pair ?\e$,1"*\e(B ?\e$,1",\e(B tbl)
935 (set-case-syntax-pair ?\e$,1"+\e(B ?\e$,1",\e(B tbl)
936 (set-case-syntax-pair ?\e$,1"-\e(B ?\e$,1".\e(B tbl)
937 (set-case-syntax-pair ?\e$,1"/\e(B ?\e$,1"0\e(B tbl)
938 (set-case-syntax-pair ?\e$,1"1\e(B ?\e$,1"2\e(B tbl)
939 (set-case-syntax-pair ?\e$,1"3\e(B ?\e$,1"4\e(B tbl)
940 (set-case-syntax-pair ?\e$,1"5\e(B ?\e$,1"6\e(B tbl)
941 (set-case-syntax-pair ?\e$,1"7\e(B ?\e$,1"8\e(B tbl)
942 (set-case-syntax-pair ?\e$,1"9\e(B ?\e$,1":\e(B tbl)
943 (set-case-syntax-pair ?\e$,1";\e(B ?\e$,1"<\e(B tbl)
944 (set-case-syntax-pair ?\e$,1">\e(B ?\e$,1"?\e(B tbl)
945 (set-case-syntax-pair ?\e$,1"@\e(B ?\e$,1"A\e(B tbl)
946 (set-case-syntax-pair ?\e$,1"B\e(B ?\e$,1"C\e(B tbl)
947 (set-case-syntax-pair ?\e$,1"D\e(B ?\e$,1"E\e(B tbl)
948 (set-case-syntax-pair ?\e$,1"F\e(B ?\e$,1"G\e(B tbl)
949 (set-case-syntax-pair ?\e$,1"H\e(B ?\e$,1"I\e(B tbl)
950 (set-case-syntax-pair ?\e$,1"J\e(B ?\e$,1"K\e(B tbl)
951 (set-case-syntax-pair ?\e$,1"L\e(B ?\e$,1"M\e(B tbl)
952 (set-case-syntax-pair ?\e$,1"N\e(B ?\e$,1"O\e(B tbl)
953 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
954 (set-case-syntax-pair ?\e$,1"Q\e(B ?\e$,1"S\e(B tbl)
955 (set-case-syntax-pair ?\e$,1"R\e(B ?\e$,1"S\e(B tbl)
956 (set-case-syntax-pair ?\e$,1"T\e(B ?\e$,1"U\e(B tbl)
957 (set-case-syntax-pair ?\e$,1"V\e(B ?\e$,1!U\e(B tbl)
958 (set-case-syntax-pair ?\e$,1"W\e(B ?\e$,1!\7f\e(B tbl)
959 (set-case-syntax-pair ?\e$,1"X\e(B ?\e$,1"Y\e(B tbl)
960 (set-case-syntax-pair ?\e$,1"Z\e(B ?\e$,1"[\e(B tbl)
961 (set-case-syntax-pair ?\e$,1"\\e(B ?\e$,1"]\e(B tbl)
962 (set-case-syntax-pair ?\e$,1"^\e(B ?\e$,1"_\e(B tbl)
963 (set-case-syntax-pair ?\e$,1"`\e(B ?\e$,1"a\e(B tbl)
964 (set-case-syntax-pair ?\e$,1"b\e(B ?\e$,1"c\e(B tbl)
965 (set-case-syntax-pair ?\e$,1"d\e(B ?\e$,1"e\e(B tbl)
966 (set-case-syntax-pair ?\e$,1"f\e(B ?\e$,1"g\e(B tbl)
967 (set-case-syntax-pair ?\e$,1"h\e(B ?\e$,1"i\e(B tbl)
968 (set-case-syntax-pair ?\e$,1"j\e(B ?\e$,1"k\e(B tbl)
969 (set-case-syntax-pair ?\e$,1"l\e(B ?\e$,1"m\e(B tbl)
970 (set-case-syntax-pair ?\e$,1"n\e(B ?\e$,1"o\e(B tbl)
971 (set-case-syntax-pair ?\e$,1"p\e(B ?\e$,1"q\e(B tbl)
972 (set-case-syntax-pair ?\e$,1"r\e(B ?\e$,1"s\e(B tbl)
973 (set-case-syntax-pair ?\e$,1"t\e(B ?\e$,1"u\e(B tbl)
974 (set-case-syntax-pair ?\e$,1"v\e(B ?\e$,1"w\e(B tbl)
975 (set-case-syntax-pair ?\e$,1"x\e(B ?\e$,1"y\e(B tbl)
976 (set-case-syntax-pair ?\e$,1"z\e(B ?\e$,1"{\e(B tbl)
977 (set-case-syntax-pair ?\e$,1"|\e(B ?\e$,1"}\e(B tbl)
978 (set-case-syntax-pair ?\e$,1"~\e(B ?\e$,1"\7f\e(B tbl)
979 (set-case-syntax-pair ?\e$,1#"\e(B ?\e$,1##\e(B tbl)
980 (set-case-syntax-pair ?\e$,1#$\e(B ?\e$,1#%\e(B tbl)
981 (set-case-syntax-pair ?\e$,1#&\e(B ?\e$,1#'\e(B tbl)
982 (set-case-syntax-pair ?\e$,1#(\e(B ?\e$,1#)\e(B tbl)
983 (set-case-syntax-pair ?\e$,1#*\e(B ?\e$,1#+\e(B tbl)
984 (set-case-syntax-pair ?\e$,1#,\e(B ?\e$,1#-\e(B tbl)
985 (set-case-syntax-pair ?\e$,1#.\e(B ?\e$,1#/\e(B tbl)
986 (set-case-syntax-pair ?\e$,1#0\e(B ?\e$,1#1\e(B tbl)
987 (set-case-syntax-pair ?\e$,1#2\e(B ?\e$,1#3\e(B tbl)
988
989 ;; Latin Extended Additional
990 (setq c #x1e00)
991 (while (<= c #x1ef9)
992 (modify-category-entry (decode-char 'ucs c) ?l)
993 (and (zerop (% c 2))
994 (or (<= c #x1e94) (>= c #x1ea0))
995 (set-case-syntax-pair
996 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
997 (setq c (1+ c)))
998
999 ;; Greek
1000 (setq c #x0370)
1001 (while (<= c #x03ff)
1002 (modify-category-entry (decode-char 'ucs c) ?g)
1003 (if (or (and (>= c #x0391) (<= c #x03a1))
1004 (and (>= c #x03a3) (<= c #x03ab)))
1005 (set-case-syntax-pair
1006 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1007 (and (>= c #x03da)
1008 (<= c #x03ee)
1009 (zerop (% c 2))
1010 (set-case-syntax-pair
1011 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1012 (setq c (1+ c)))
1013 (set-case-syntax-pair ?\e$,1&f\e(B ?\e$,1',\e(B tbl)
1014 (set-case-syntax-pair ?\e$,1&h\e(B ?\e$,1'-\e(B tbl)
1015 (set-case-syntax-pair ?\e$,1&i\e(B ?\e$,1'.\e(B tbl)
1016 (set-case-syntax-pair ?\e$,1&j\e(B ?\e$,1'/\e(B tbl)
1017 (set-case-syntax-pair ?\e$,1&l\e(B ?\e$,1'L\e(B tbl)
1018 (set-case-syntax-pair ?\e$,1&n\e(B ?\e$,1'M\e(B tbl)
1019 (set-case-syntax-pair ?\e$,1&o\e(B ?\e$,1'N\e(B tbl)
1020
1021 ;; Armenian
1022 (setq c #x531)
1023 (while (<= c #x556)
1024 (set-case-syntax-pair (decode-char 'ucs c)
1025 (decode-char 'ucs (+ c #x30)) tbl)
1026 (setq c (1+ c)))
1027
1028 ;; Greek Extended
1029 (setq c #x1f00)
1030 (while (<= c #x1fff)
1031 (modify-category-entry (decode-char 'ucs c) ?g)
1032 (and (<= (logand c #x000f) 7)
1033 (<= c #x1fa7)
1034 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1035 (/= (logand c #x00f0) 7)
1036 (set-case-syntax-pair
1037 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
1038 (setq c (1+ c)))
1039 (set-case-syntax-pair ?\e$,1qx\e(B ?\e$,1qp\e(B tbl)
1040 (set-case-syntax-pair ?\e$,1qy\e(B ?\e$,1qq\e(B tbl)
1041 (set-case-syntax-pair ?\e$,1qz\e(B ?\e$,1q0\e(B tbl)
1042 (set-case-syntax-pair ?\e$,1q{\e(B ?\e$,1q1\e(B tbl)
1043 (set-case-syntax-pair ?\e$,1q|\e(B ?\e$,1qs\e(B tbl)
1044 (set-case-syntax-pair ?\e$,1r(\e(B ?\e$,1q2\e(B tbl)
1045 (set-case-syntax-pair ?\e$,1r)\e(B ?\e$,1q3\e(B tbl)
1046 (set-case-syntax-pair ?\e$,1r*\e(B ?\e$,1q4\e(B tbl)
1047 (set-case-syntax-pair ?\e$,1r+\e(B ?\e$,1q5\e(B tbl)
1048 (set-case-syntax-pair ?\e$,1r,\e(B ?\e$,1r#\e(B tbl)
1049 (set-case-syntax-pair ?\e$,1r8\e(B ?\e$,1r0\e(B tbl)
1050 (set-case-syntax-pair ?\e$,1r9\e(B ?\e$,1r1\e(B tbl)
1051 (set-case-syntax-pair ?\e$,1r:\e(B ?\e$,1q6\e(B tbl)
1052 (set-case-syntax-pair ?\e$,1r;\e(B ?\e$,1q7\e(B tbl)
1053 (set-case-syntax-pair ?\e$,1rH\e(B ?\e$,1r@\e(B tbl)
1054 (set-case-syntax-pair ?\e$,1rI\e(B ?\e$,1rA\e(B tbl)
1055 (set-case-syntax-pair ?\e$,1rJ\e(B ?\e$,1q:\e(B tbl)
1056 (set-case-syntax-pair ?\e$,1rK\e(B ?\e$,1q;\e(B tbl)
1057 (set-case-syntax-pair ?\e$,1rL\e(B ?\e$,1rE\e(B tbl)
1058 (set-case-syntax-pair ?\e$,1rX\e(B ?\e$,1q8\e(B tbl)
1059 (set-case-syntax-pair ?\e$,1rY\e(B ?\e$,1q9\e(B tbl)
1060 (set-case-syntax-pair ?\e$,1rZ\e(B ?\e$,1q<\e(B tbl)
1061 (set-case-syntax-pair ?\e$,1r[\e(B ?\e$,1q=\e(B tbl)
1062 (set-case-syntax-pair ?\e$,1r\\e(B ?\e$,1rS\e(B tbl)
1063
1064 ;; cyrillic
1065 (setq c #x0400)
1066 (while (<= c #x04ff)
1067 (modify-category-entry (decode-char 'ucs c) ?y)
1068 (and (>= c #x0400)
1069 (<= c #x040f)
1070 (set-case-syntax-pair
1071 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1072 (and (>= c #x0410)
1073 (<= c #x042f)
1074 (set-case-syntax-pair
1075 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1076 (and (zerop (% c 2))
1077 (or (and (>= c #x0460) (<= c #x0480))
1078 (and (>= c #x048c) (<= c #x04be))
1079 (and (>= c #x04d0) (<= c #x04f4)))
1080 (set-case-syntax-pair
1081 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
1082 (setq c (1+ c)))
1083 (set-case-syntax-pair ?\e$,1*!\e(B ?\e$,1*"\e(B tbl)
1084 (set-case-syntax-pair ?\e$,1*#\e(B ?\e$,1*$\e(B tbl)
1085 (set-case-syntax-pair ?\e$,1*'\e(B ?\e$,1*(\e(B tbl)
1086 (set-case-syntax-pair ?\e$,1*+\e(B ?\e$,1*,\e(B tbl)
1087 (set-case-syntax-pair ?\e$,1*X\e(B ?\e$,1*Y\e(B tbl)
1088
1089 ;; general punctuation
1090 (setq c #x2000)
1091 (while (<= c #x200b)
1092 (set-case-syntax (decode-char 'ucs c) " " tbl)
1093 (setq c (1+ c)))
1094 (setq c #x2010)
1095 (while (<= c #x2016)
1096 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1097 (setq c (1+ c)))
1098 ;; Punctuation syntax for quotation marks (like `)
1099 (while (<= c #x201f)
1100 (set-case-syntax (decode-char 'ucs c) "." tbl)
1101 (setq c (1+ c)))
1102 (while (<= c #x2027)
1103 (set-case-syntax (decode-char 'ucs c) "_" tbl)
1104 (setq c (1+ c)))
1105
1106 ;; Roman numerals
1107 (setq c #x2160)
1108 (while (<= c #x216f)
1109 (set-case-syntax-pair (decode-char 'ucs c)
1110 (decode-char 'ucs (+ c #x10)) tbl)
1111 (setq c (1+ c)))
1112
1113 ;; Circled Latin
1114 (setq c #x24b6)
1115 (while (<= c #x24cf)
1116 (set-case-syntax-pair (decode-char 'ucs c)
1117 (decode-char 'ucs (+ c 26)) tbl)
1118 (modify-category-entry (decode-char 'ucs c) ?l)
1119 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1120 (setq c (1+ c)))
1121
1122 ;; Fullwidth Latin
1123 (setq c #xff21)
1124 (while (<= c #xff3a)
1125 (set-case-syntax-pair (decode-char 'ucs c)
1126 (decode-char 'ucs (+ c #x20)) tbl)
1127 (modify-category-entry (decode-char 'ucs c) ?l)
1128 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1129 (setq c (1+ c)))
1130
1131 ;; Combining diacritics
1132 (setq c #x300)
1133 (while (<= c #x362)
1134 (modify-category-entry (decode-char 'ucs c) ?^)
1135 (setq c (1+ c)))
1136
1137 ;; Combining marks
1138 (setq c #x20d0)
1139 (while (<= c #x20e3)
1140 (modify-category-entry (decode-char 'ucs c) ?^)
1141 (setq c (1+ c)))
1142
1143 ;; Fixme: syntax for symbols &c
1144 )
1145
1146 (let ((pairs
1147 '("\e$,1sEsF\e(B" ; U+2045 U+2046
1148 "\e$,1s}s~\e(B" ; U+207D U+207E
1149 "\e$,1t-t.\e(B" ; U+208D U+208E
1150 "\e$,1{){*\e(B" ; U+2329 U+232A
1151 "\e$,1|T|U\e(B" ; U+23B4 U+23B5
1152 "\e$,2&H&I\e(B" ; U+2768 U+2769
1153 "\e$,2&J&K\e(B" ; U+276A U+276B
1154 "\e$,2&L&M\e(B" ; U+276C U+276D
1155 "\e$,2&P&Q\e(B" ; U+2770 U+2771
1156 "\e$,2&R&S\e(B" ; U+2772 U+2773
1157 "\e$,2&T&U\e(B" ; U+2774 U+2775
1158 "\e$,2'f'g\e(B" ; U+27E6 U+27E7
1159 "\e$,2'h'i\e(B" ; U+27E8 U+27E9
1160 "\e$,2'j'k\e(B" ; U+27EA U+27EB
1161 "\e$,2,#,$\e(B" ; U+2983 U+2984
1162 "\e$,2,%,&\e(B" ; U+2985 U+2986
1163 "\e$,2,',(\e(B" ; U+2987 U+2988
1164 "\e$,2,),*\e(B" ; U+2989 U+298A
1165 "\e$,2,+,,\e(B" ; U+298B U+298C
1166 "\e$,2,-,.\e(B" ; U+298D U+298E
1167 "\e$,2,/,0\e(B" ; U+298F U+2990
1168 "\e$,2,1,2\e(B" ; U+2991 U+2992
1169 "\e$,2,3,4\e(B" ; U+2993 U+2994
1170 "\e$,2,5,6\e(B" ; U+2995 U+2996
1171 "\e$,2,7,8\e(B" ; U+2997 U+2998
1172 "\e$,2-<-=\e(B" ; U+29FC U+29FD
1173 "\e$,2=H=I\e(B" ; U+3008 U+3009
1174 "\e$,2=J=K\e(B" ; U+300A U+300B
1175 "\e$,2=L=M\e(B" ; U+300C U+300D
1176 "\e$,2=N=O\e(B" ; U+300E U+300F
1177 "\e$,2=P=Q\e(B" ; U+3010 U+3011
1178 "\e$,2=T=U\e(B" ; U+3014 U+3015
1179 "\e$,2=V=W\e(B" ; U+3016 U+3017
1180 "\e$,2=X=Y\e(B" ; U+3018 U+3019
1181 "\e$,2=Z=[\e(B" ; U+301A U+301B
1182 "\e$,3m~m\7f\e(B" ; U+FD3E U+FD3F
1183 "\e$,3pUpV\e(B" ; U+FE35 U+FE36
1184 "\e$,3pWpX\e(B" ; U+FE37 U+FE38
1185 "\e$,3pYpZ\e(B" ; U+FE39 U+FE3A
1186 "\e$,3p[p\\e(B" ; U+FE3B U+FE3C
1187 "\e$,3p]p^\e(B" ; U+FE3D U+FE3E
1188 "\e$,3p_p`\e(B" ; U+FE3F U+FE40
1189 "\e$,3papb\e(B" ; U+FE41 U+FE42
1190 "\e$,3pcpd\e(B" ; U+FE43 U+FE44
1191 "\e$,3pypz\e(B" ; U+FE59 U+FE5A
1192 "\e$,3p{p|\e(B" ; U+FE5B U+FE5C
1193 "\e$,3p}p~\e(B" ; U+FE5D U+FE5E
1194 "\e$,3rhri\e(B" ; U+FF08 U+FF09
1195 "\e$,3s;s=\e(B" ; U+FF3B U+FF3D
1196 "\e$,3s[s]\e(B" ; U+FF5B U+FF5D
1197 "\e$,3s_s`\e(B" ; U+FF5F U+FF60
1198 "\e$,3sbsc\e(B" ; U+FF62 U+FF63
1199 )))
1200 (dolist (elt pairs)
1201 (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
1202 (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
1203
1204 \f
1205 ;;; Setting word boundary.
1206
1207 (setq word-combining-categories
1208 '((?l . ?l)))
1209
1210 (setq word-separating-categories ; (2-byte character sets)
1211 '((?A . ?K) ; Alpha numeric - Katakana
1212 (?A . ?C) ; Alpha numeric - Chinese
1213 (?H . ?A) ; Hiragana - Alpha numeric
1214 (?H . ?K) ; Hiragana - Katakana
1215 (?H . ?C) ; Hiragana - Chinese
1216 (?K . ?A) ; Katakana - Alpha numeric
1217 (?K . ?C) ; Katakana - Chinese
1218 (?C . ?A) ; Chinese - Alpha numeric
1219 (?C . ?K) ; Chinese - Katakana
1220 ))
1221
1222 \f
1223 ;; For each character set, put the information of the most proper
1224 ;; coding system to encode it by `preferred-coding-system' property.
1225
1226 (let ((l '((latin-iso8859-1 . iso-latin-1)
1227 (latin-iso8859-2 . iso-latin-2)
1228 (latin-iso8859-3 . iso-latin-3)
1229 (latin-iso8859-4 . iso-latin-4)
1230 (thai-tis620 . thai-tis620)
1231 (greek-iso8859-7 . greek-iso-8bit)
1232 (arabic-iso8859-6 . iso-2022-7bit)
1233 (hebrew-iso8859-8 . hebrew-iso-8bit)
1234 (katakana-jisx0201 . japanese-shift-jis)
1235 (latin-jisx0201 . japanese-shift-jis)
1236 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
1237 (latin-iso8859-9 . iso-latin-5)
1238 (japanese-jisx0208-1978 . iso-2022-jp)
1239 (chinese-gb2312 . cn-gb-2312)
1240 (japanese-jisx0208 . iso-2022-jp)
1241 (korean-ksc5601 . iso-2022-kr)
1242 (japanese-jisx0212 . iso-2022-jp)
1243 (chinese-cns11643-1 . iso-2022-cn)
1244 (chinese-cns11643-2 . iso-2022-cn)
1245 (chinese-big5-1 . chinese-big5)
1246 (chinese-big5-2 . chinese-big5)
1247 (chinese-sisheng . iso-2022-7bit)
1248 (ipa . iso-2022-7bit)
1249 (vietnamese-viscii-lower . vietnamese-viscii)
1250 (vietnamese-viscii-upper . vietnamese-viscii)
1251 (arabic-digit . iso-2022-7bit)
1252 (arabic-1-column . iso-2022-7bit)
1253 (ascii-right-to-left . iso-2022-7bit)
1254 (lao . lao)
1255 (arabic-2-column . iso-2022-7bit)
1256 (indian-is13194 . devanagari)
1257 (indian-glyph . devanagari)
1258 (tibetan-1-column . tibetan)
1259 (ethiopic . iso-2022-7bit)
1260 (chinese-cns11643-3 . iso-2022-cn)
1261 (chinese-cns11643-4 . iso-2022-cn)
1262 (chinese-cns11643-5 . iso-2022-cn)
1263 (chinese-cns11643-6 . iso-2022-cn)
1264 (chinese-cns11643-7 . iso-2022-cn)
1265 (indian-2-column . devanagari)
1266 (tibetan . tibetan)
1267 (latin-iso8859-14 . iso-latin-8)
1268 (latin-iso8859-15 . iso-latin-9))))
1269 (while l
1270 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
1271 (setq l (cdr l))))
1272
1273 \f
1274 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
1275 ;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
1276 ;; property on the charsets.
1277 (let ((l '(katakana-jisx0201
1278 japanese-jisx0208 japanese-jisx0212
1279 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1280 (while l
1281 (aset auto-fill-chars (make-char (car l)) t)
1282 (put-charset-property (car l) 'nospace-between-words t)
1283 (setq l (cdr l))))
1284
1285 \f
1286 (setq utf-translate-cjk-mode saved-utf-translate-cjk-mode)
1287 (makunbound 'saved-utf-translate-cjk-mode)
1288
1289 ;;; Local Variables:
1290 ;;; coding: iso-2022-7bit
1291 ;;; End:
1292
1293 ;;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d
1294 ;;; characters.el ends here