Fix categories ?A and ?C.
[bpt/emacs.git] / lisp / international / characters.el
1 ;;; characters.el --- set syntax and category for multibyte characters
2
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
6 ;; Copyright (C) 2001, 2002
7 ;; National Institute of Advanced Industrial Science and Technology (AIST)
8 ;; Registration Number H13PRO009
9
10 ;; Keywords: multibyte character, character set, syntax, category
11
12 ;; This file is part of GNU Emacs.
13
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 2, or (at your option)
17 ;; any later version.
18
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
23
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27 ;; Boston, MA 02111-1307, USA.
28
29 ;;; Commentary:
30
31 ;; This file contains multibyte characters. Save this file always in
32 ;; the coding system `iso-2022-7bit'.
33
34 ;; This file does not define the syntax for Latin-N character sets;
35 ;; those are defined by the files latin-N.el.
36
37 ;;; Code:
38
39 ;;; Predefined categories.
40
41 ;; For each character set.
42
43 (define-category ?a "ASCII")
44 (define-category ?l "Latin")
45 (define-category ?t "Thai")
46 (define-category ?g "Greek")
47 (define-category ?b "Arabic")
48 (define-category ?w "Hebrew")
49 (define-category ?y "Cyrillic")
50 (define-category ?k "Japanese katakana")
51 (define-category ?r "Japanese roman")
52 (define-category ?c "Chinese")
53 (define-category ?j "Japanese")
54 (define-category ?h "Korean")
55 (define-category ?e "Ethiopic (Ge'ez)")
56 (define-category ?v "Vietnamese")
57 (define-category ?i "Indian")
58 (define-category ?o "Lao")
59 (define-category ?q "Tibetan")
60
61 ;; For each group (row) of 2-byte character sets.
62
63 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
64 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
65 (define-category ?G "Greek characters of 2-byte character sets")
66 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
67 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
68 (define-category ?N "Korean Hangul characters of 2-byte character sets")
69 (define-category ?Y "Cyrillic characters of 2-byte character sets")
70 (define-category ?I "Indian Glyphs")
71
72 ;; For phonetic classifications.
73
74 (define-category ?0 "consonant")
75 (define-category ?1 "base (independent) vowel")
76 (define-category ?2 "upper diacritical mark (including upper vowel)")
77 (define-category ?3 "lower diacritical mark (including lower vowel)")
78 (define-category ?4 "tone mark")
79 (define-category ?5 "symbol")
80 (define-category ?6 "digit")
81 (define-category ?7 "vowel-modifying diacritical mark")
82 (define-category ?8 "vowel-signs")
83 (define-category ?9 "semivowel lower")
84
85 ;; For filling.
86 (define-category ?| "While filling, we can break a line at this character.")
87
88 ;; For indentation calculation.
89 (define-category ?
90 "This character counts as a space for indentation purposes.")
91
92 ;; Keep the following for `kinsoku' processing. See comments in
93 ;; kinsoku.el.
94 (define-category ?> "A character which can't be placed at beginning of line.")
95 (define-category ?< "A character which can't be placed at end of line.")
96
97 ;; Combining
98 (define-category ?^ "Combining diacritic or mark")
99 \f
100 ;;; Setting syntax and category.
101
102 ;; ASCII
103
104 ;; All ASCII characters have the category `a' (ASCII) and `l' (Latin).
105 (modify-category-entry '(32 . 127) ?a)
106 (modify-category-entry '(32 . 127) ?l)
107
108 ;; Arabic character set
109
110 (let ((charsets '(arabic-iso8859-6
111 arabic-digit
112 arabic-1-column
113 arabic-2-column)))
114 (while charsets
115 (map-charset-chars #'modify-category-entry (car charsets) ?b)
116 (setq charsets (cdr charsets))))
117 (modify-category-entry '(#x600 . #x6ff) ?b)
118 (modify-category-entry '(#xfb50 . #xfdff) ?b)
119 (modify-category-entry '(#xfe70 . #xfefe) ?b)
120
121 ;; Chinese characters (Unicode)
122 (modify-category-entry '(#x3400 . #x9FAF) ?C)
123 (modify-category-entry '(#x3400 . #x9FAF) ?c)
124 (modify-category-entry '(#x3400 . #x9FAF) ?|)
125 (modify-category-entry '(#xF900 . #xFAFF) ?C)
126 (modify-category-entry '(#xF900 . #xFAFF) ?c)
127 (modify-category-entry '(#xF900 . #xFAFF) ?|)
128
129 ;; Chinese character set (GB2312)
130
131 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2121 #x217E)
132 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2221 #x227E)
133 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2921 #x297E)
134 (modify-syntax-entry ?\〔 "(〕")
135 (modify-syntax-entry ?\〈 "(〉")
136 (modify-syntax-entry ?\《 "(》")
137 (modify-syntax-entry ?\「 "(」")
138 (modify-syntax-entry ?\『 "(』")
139 (modify-syntax-entry ?\〖 "(〗")
140 (modify-syntax-entry ?\【 "(】")
141 (modify-syntax-entry ?\〕 ")〔")
142 (modify-syntax-entry ?\〉 ")〈")
143 (modify-syntax-entry ?\》 ")《")
144 (modify-syntax-entry ?\」 ")「")
145 (modify-syntax-entry ?\』 ")『")
146 (modify-syntax-entry ?\〗 ")〖")
147 (modify-syntax-entry ?\】 ")【")
148
149 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c)
150 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?|)
151 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2330 #x2339)
152 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2341 #x235A)
153 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2361 #x237A)
154 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?H #x2421 #x247E)
155 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?K #x2521 #x257E)
156 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?G #x2621 #x267E)
157 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E)
158 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E)
159
160 ;; Chinese character set (BIG5)
161
162 (map-charset-chars #'modify-category-entry 'big5 ?c)
163 (map-charset-chars #'modify-category-entry 'big5 ?C #xA259 #xA25F)
164 (map-charset-chars #'modify-category-entry 'big5 ?C #xA440 #xC67E)
165 (map-charset-chars #'modify-category-entry 'big5 ?C #xC940 #xF9DF)
166 (map-charset-chars #'modify-category-entry 'big5 ?|)
167
168
169 ;; Chinese character set (CNS11643)
170
171 (dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
172 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
173 chinese-cns11643-7))
174 (map-charset-chars #'modify-category-entry c ?c)
175 (if (eq c 'chinese-cns11643-1)
176 (map-charset-chars #'modify-category-entry c ?C #x4421 #x7E7E)
177 (map-charset-chars #'modify-category-entry c ?C))
178 (map-charset-chars #'modify-category-entry c ?|))
179
180 ;; Cyrillic character set (ISO-8859-5)
181
182 (modify-syntax-entry ?№ ".")
183 (let ((tbl (standard-case-table)))
184 (set-case-syntax-pair ?Ё ?ё tbl)
185 (set-case-syntax-pair ?Ђ ?ђ tbl)
186 (set-case-syntax-pair ?Ѓ ?ѓ tbl)
187 (set-case-syntax-pair ?Є ?є tbl)
188 (set-case-syntax-pair ?Ѕ ?ѕ tbl)
189 (set-case-syntax-pair ?І ?і tbl)
190 (set-case-syntax-pair ?Ї ?ї tbl)
191 (set-case-syntax-pair ?Ј ?ј tbl)
192 (set-case-syntax-pair ?Љ ?љ tbl)
193 (set-case-syntax-pair ?Њ ?њ tbl)
194 (set-case-syntax-pair ?Ћ ?ћ tbl)
195 (set-case-syntax-pair ?Ќ ?ќ tbl)
196 (set-case-syntax-pair ?Ў ?ў tbl)
197 (set-case-syntax-pair ?Џ ?џ tbl)
198 (set-case-syntax-pair ?А ?а tbl)
199 (set-case-syntax-pair ?Б ?б tbl)
200 (set-case-syntax-pair ?В ?в tbl)
201 (set-case-syntax-pair ?Г ?г tbl)
202 (set-case-syntax-pair ?Д ?д tbl)
203 (set-case-syntax-pair ?Е ?е tbl)
204 (set-case-syntax-pair ?Ж ?ж tbl)
205 (set-case-syntax-pair ?З ?з tbl)
206 (set-case-syntax-pair ?И ?и tbl)
207 (set-case-syntax-pair ?Й ?й tbl)
208 (set-case-syntax-pair ?К ?к tbl)
209 (set-case-syntax-pair ?Л ?л tbl)
210 (set-case-syntax-pair ?М ?м tbl)
211 (set-case-syntax-pair ?Н ?н tbl)
212 (set-case-syntax-pair ?О ?о tbl)
213 (set-case-syntax-pair ?П ?п tbl)
214 (set-case-syntax-pair ?Р ?р tbl)
215 (set-case-syntax-pair ?С ?с tbl)
216 (set-case-syntax-pair ?Т ?т tbl)
217 (set-case-syntax-pair ?У ?у tbl)
218 (set-case-syntax-pair ?Ф ?ф tbl)
219 (set-case-syntax-pair ?Х ?х tbl)
220 (set-case-syntax-pair ?Ц ?ц tbl)
221 (set-case-syntax-pair ?Ч ?ч tbl)
222 (set-case-syntax-pair ?Ш ?ш tbl)
223 (set-case-syntax-pair ?Щ ?щ tbl)
224 (set-case-syntax-pair ?Ъ ?ъ tbl)
225 (set-case-syntax-pair ?Ы ?ы tbl)
226 (set-case-syntax-pair ?Ь ?ь tbl)
227 (set-case-syntax-pair ?Э ?э tbl)
228 (set-case-syntax-pair ?Ю ?ю tbl)
229 (set-case-syntax-pair ?Я ?я tbl))
230
231 ;; Ethiopic character set
232
233 (modify-category-entry '(#x1200 . #x137b) ?e)
234 (let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨ ? ? ? ? ? ?)))
235 (while chars
236 (modify-syntax-entry (car chars) ".")
237 (setq chars (cdr chars))))
238 (map-charset-chars #'modify-category-entry 'ethiopic ?e)
239
240 ;; Greek character set (ISO-8859-7)
241
242 (modify-category-entry '(#x370 . #x3ff) ?g)
243
244 (let ((tbl (standard-case-table)))
245 (set-case-syntax-pair ?Α ?α tbl)
246 (set-case-syntax-pair ?Β ?β tbl)
247 (set-case-syntax-pair ?Γ ?γ tbl)
248 (set-case-syntax-pair ?Δ ?δ tbl)
249 (set-case-syntax-pair ?Ε ?ε tbl)
250 (set-case-syntax-pair ?Ζ ?ζ tbl)
251 (set-case-syntax-pair ?Η ?η tbl)
252 (set-case-syntax-pair ?Θ ?θ tbl)
253 (set-case-syntax-pair ?Ι ?ι tbl)
254 (set-case-syntax-pair ?Κ ?κ tbl)
255 (set-case-syntax-pair ?Λ ?λ tbl)
256 (set-case-syntax-pair ?Μ ?μ tbl)
257 (set-case-syntax-pair ?Ν ?ν tbl)
258 (set-case-syntax-pair ?Ξ ?ξ tbl)
259 (set-case-syntax-pair ?Ο ?ο tbl)
260 (set-case-syntax-pair ?Π ?π tbl)
261 (set-case-syntax-pair ?Ρ ?ρ tbl)
262 (set-case-syntax-pair ?Σ ?σ tbl)
263 (set-case-syntax-pair ?Τ ?τ tbl)
264 (set-case-syntax-pair ?Υ ?υ tbl)
265 (set-case-syntax-pair ?Φ ?φ tbl)
266 (set-case-syntax-pair ?Χ ?χ tbl)
267 (set-case-syntax-pair ?Ψ ?ψ tbl)
268 (set-case-syntax-pair ?Ω ?ω tbl)
269 (set-case-syntax-pair ?Ϊ ?ϊ tbl)
270 (set-case-syntax-pair ?Ϋ ?ϋ tbl)
271 (set-case-syntax-pair ?Ώ ?ώ tbl)
272 (set-case-syntax-pair ?Ύ ?ύ tbl)
273 (set-case-syntax-pair ?Ό ?ό tbl)
274 (set-case-syntax-pair ?Ά ?ά tbl)
275 (set-case-syntax-pair ?Έ ?έ tbl)
276 (set-case-syntax-pair ?Ή ?ή tbl)
277 (set-case-syntax-pair ?Ί ?ί tbl))
278
279 ;; Hebrew character set (ISO-8859-8)
280
281 (modify-syntax-entry #x5be ".") ; MAQAF
282 (modify-syntax-entry #x5c0 ".") ; PASEQ
283 (modify-syntax-entry #x5c3 ".") ; SOF PASUQ
284 (modify-syntax-entry #x5f3 ".") ; GERESH
285 (modify-syntax-entry #x5f4 ".") ; GERSHAYIM
286
287 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
288
289 (modify-category-entry '(#x901 . #x970) ?i)
290 (map-charset-chars #'modify-category-entry 'indian-is13194 ?i)
291 (map-charset-chars #'modify-category-entry 'indian-2-column ?i)
292
293
294 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
295
296 (map-charset-chars #'modify-category-entry 'katakana-jisx0201 ?k)
297
298 (map-charset-chars #'modify-category-entry 'latin-jisx0201 ?r)
299
300 (dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212))
301 (map-charset-chars #'modify-category-entry l ?j)
302 (map-charset-chars #'modify-category-entry l ?\|))
303
304 ;; Unicode equivalents of JISX0201-kana
305 (let ((range '(#xff61 . #xff9f)))
306 (modify-category-entry range ?k)
307 (modify-category-entry range ?j)
308 (modify-category-entry range ?\|))
309
310 ;; Katakana block
311 (let ((range '(#x30a0 . #x30ff)))
312 ;; ?K is double width, ?k isn't specified
313 (modify-category-entry range ?K)
314 (modify-category-entry range ?\|))
315
316 ;; Hiragana block
317 (let ((range '(#x3040 . #x309f)))
318 ;; ?H is actually defined to be double width
319 ;;(modify-category-entry range ?H)
320 ;;(modify-category-entry range ?\|)
321 )
322
323 ;; JISX0208
324 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2121 #x227E)
325 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2821 #x287E)
326 (let ((chars '(?ー ?゛ ?゜ ?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇)))
327 (dolist (elt chars)
328 (modify-syntax-entry (car chars) "w")))
329 (modify-syntax-entry ?\( "()")
330 (modify-syntax-entry ?\[ "(]")
331 (modify-syntax-entry ?\{ "(}")
332 (modify-syntax-entry ?\「 "(」")
333 (modify-syntax-entry ?\『 "(』")
334 (modify-syntax-entry ?\) ")(")
335 (modify-syntax-entry ?\] ")[")
336 (modify-syntax-entry ?\} "){")
337 (modify-syntax-entry ?\」 ")「")
338 (modify-syntax-entry ?\』 ")『")
339
340 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?A #x2321 #x237E)
341 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?H #x2421 #x247E)
342 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?K #x2521 #x257E)
343 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?G #x2621 #x267E)
344 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?Y #x2721 #x277E)
345 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?C #x3021 #x7E7E)
346 (modify-category-entry ?ー ?K)
347 (let ((chars '(?゛ ?゜)))
348 (while chars
349 (modify-category-entry (car chars) ?K)
350 (modify-category-entry (car chars) ?H)
351 (setq chars (cdr chars))))
352 (let ((chars '(?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇)))
353 (while chars
354 (modify-category-entry (car chars) ?C)
355 (setq chars (cdr chars))))
356
357 ;; JISX0212
358
359 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0212 "_" #x2121 #x237E)
360
361 ;; JISX0201-Kana
362
363 (let ((chars '(?。 ?、 ?・)))
364 (while chars
365 (modify-syntax-entry (car chars) ".")
366 (setq chars (cdr chars))))
367
368 (modify-syntax-entry ?\「 "(」")
369 (modify-syntax-entry ?\」 "(「")
370
371 ;; Korean character set (KSC5601)
372
373 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?h)
374
375 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2121 #x227E)
376 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2621 #x297E)
377 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2330 #x2339)
378 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2341 #x235A)
379 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2361 #x237A)
380 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?G #x2521 #x257E)
381 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?H #x2A21 #x2A7E)
382 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?K #x2B21 #x2B7E)
383 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?Y #x2C21 #x2C7E)
384
385 ;; Latin
386
387 (modify-category-entry '(#x80 . #x024F) ?l)
388
389 ;; Lao character set
390
391 (modify-category-entry '(#xe80 . #xeff) ?o)
392 (map-charset-chars #'modify-category-entry 'lao ?o)
393
394 (let ((deflist '(("ກ-ຮ" "w" ?0) ; consonant
395 ("ະາຳຽເ-ໄ" "w" ?1) ; vowel base
396 ("ັິ-ືົໍ" "w" ?2) ; vowel upper
397 ("ຸູ" "w" ?3) ; vowel lower
398 ("່-໋" "w" ?4) ; tone mark
399 ("ຼຽ" "w" ?9) ; semivowel lower
400 ("໐-໙" "w" ?6) ; digit
401 ("ຯໆ" "_" ?5) ; symbol
402 ))
403 elm chars len syntax category to ch i)
404 (while deflist
405 (setq elm (car deflist))
406 (setq chars (car elm)
407 len (length chars)
408 syntax (nth 1 elm)
409 category (nth 2 elm)
410 i 0)
411 (while (< i len)
412 (if (= (aref chars i) ?-)
413 (setq i (1+ i)
414 to (aref chars i))
415 (setq ch (aref chars i)
416 to ch))
417 (while (<= ch to)
418 (unless (string-equal syntax "w")
419 (modify-syntax-entry ch syntax))
420 (modify-category-entry ch category)
421 (setq ch (1+ ch)))
422 (setq i (1+ i)))
423 (setq deflist (cdr deflist))))
424
425 ;; Thai character set (TIS620)
426
427 (modify-category-entry '(#xe00 . #xe7f) ?t)
428 (map-charset-chars #'modify-category-entry 'thai-tis620 ?t)
429
430 (let ((deflist '(;; chars syntax category
431 ("ก-รลว-ฮ" "w" ?0) ; consonant
432 ("ฤฦะาำเ-ๅ" "w" ?1) ; vowel base
433 ("ัิ-ื็๎" "w" ?2) ; vowel upper
434 ("ุ-ฺ" "w" ?3) ; vowel lower
435 ("่-ํ" "w" ?4) ; tone mark
436 ("๐-๙" "w" ?6) ; digit
437 ("ฯๆ฿๏๚๛" "_" ?5) ; symbol
438 ))
439 elm chars len syntax category to ch i)
440 (while deflist
441 (setq elm (car deflist))
442 (setq chars (car elm)
443 len (length chars)
444 syntax (nth 1 elm)
445 category (nth 2 elm)
446 i 0)
447 (while (< i len)
448 (if (= (aref chars i) ?-)
449 (setq i (1+ i)
450 to (aref chars i))
451 (setq ch (aref chars i)
452 to ch))
453 (while (<= ch to)
454 (unless (string-equal syntax "w")
455 (modify-syntax-entry ch syntax))
456 (modify-category-entry ch category)
457 (setq ch (1+ ch)))
458 (setq i (1+ i)))
459 (setq deflist (cdr deflist))))
460
461 ;; Tibetan character set
462
463 (modify-category-entry '(#xf00 . #xfff) ?q)
464 (map-charset-chars #'modify-category-entry 'tibetan ?q)
465 (map-charset-chars #'modify-category-entry 'tibetan-1-column ?q)
466
467 (let ((deflist '(;; chars syntax category
468 ("ཀ-ཀྵཪ" "w" ?0) ; consonant
469 ("ྐ-ྐྵྺྻྼ" "w" ?0) ;
470 ("-" "w" ?0) ;
471 ("-" "w" ?0) ;
472 ("ིེཻོཽྀ" "w" ?2) ; upper vowel
473 ("ཾྂྃ྆྇ྈྉྊྋ" "w" ?2) ; upper modifier
474 ("྄ཱུ༙༵༷" "w" ?3) ; lowel vowel/modifier
475 ("༠-༩༪-༳" "w" ?6) ; digit
476 ("་།-༒༔ཿ" "." ?|) ; line-break char
477 ("་།༏༐༑༔ཿ" "." ?|) ;
478 ("༈་།-༒༔ཿ༽༴" "." ?>) ; prohibition
479 ("་།༏༐༑༔ཿ" "." ?>) ;
480 ("ༀ-༊༼࿁࿂྅" "." ?<) ; prohibition
481 ("༓༕-༘༚-༟༶༸-༻༾༿྾྿-࿏" "." ?q) ; others
482 ))
483 elm chars len syntax category to ch i)
484 (while deflist
485 (setq elm (car deflist))
486 (setq chars (car elm)
487 len (length chars)
488 syntax (nth 1 elm)
489 category (nth 2 elm)
490 i 0)
491 (while (< i len)
492 (if (= (aref chars i) ?-)
493 (setq i (1+ i)
494 to (aref chars i))
495 (setq ch (aref chars i)
496 to ch))
497 (while (<= ch to)
498 (unless (string-equal syntax "w")
499 (modify-syntax-entry ch syntax))
500 (modify-category-entry ch category)
501 (setq ch (1+ ch)))
502 (setq i (1+ i)))
503 (setq deflist (cdr deflist))))
504
505 ;; Vietnamese character set
506
507 ;; To make a word with Latin characters
508 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?l)
509 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?v)
510
511 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?l)
512 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?v)
513
514 (let ((tbl (standard-case-table))
515 (i 32))
516 (while (< i 128)
517 (let* ((char (decode-char 'vietnamese-viscii-upper i))
518 (charl (decode-char 'vietnamese-viscii-lower i))
519 (uc (encode-char char 'ucs))
520 (lc (encode-char charl 'ucs)))
521 (set-case-syntax-pair char (decode-char 'vietnamese-viscii-lower i)
522 tbl)
523 (if uc (modify-category-entry uc ?v))
524 (if lc (modify-category-entry lc ?v)))
525 (setq i (1+ i))))
526
527 (let ((tbl (standard-case-table)) c)
528
529 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
530 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
531 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
532 ;; Thus we have to check language-environment to handle casing
533 ;; correctly. Currently only I<->i is available.
534
535 ;; Latin Extended-A, Latin Extended-B
536 (setq c #x0100)
537 (while (<= c #x0233)
538 (and (or (<= c #x012e)
539 (and (>= c #x014a) (<= c #x0177)))
540 (zerop (% c 2))
541 (set-case-syntax-pair c (1+ c) tbl))
542 (and (>= c #x013a)
543 (<= c #x0148)
544 (zerop (% c 2))
545 (set-case-syntax-pair (1- c) c tbl))
546 (setq c (1+ c)))
547 (set-case-syntax-pair ?IJ ?ij tbl)
548 (set-case-syntax-pair ?Ĵ ?ĵ tbl)
549 (set-case-syntax-pair ?Ķ ?ķ tbl)
550 ;;; (set-case-syntax-pair ?Ÿ ?ÿ tbl) ; these two have different length!
551 (set-case-syntax-pair ?Ź ?ź tbl)
552 (set-case-syntax-pair ?Ż ?ż tbl)
553 (set-case-syntax-pair ?Ž ?ž tbl)
554
555 ;; Latin Extended-B
556 (set-case-syntax-pair ?Ɓ ?ɓ tbl)
557 (set-case-syntax-pair ?Ƃ ?ƃ tbl)
558 (set-case-syntax-pair ?Ƅ ?ƅ tbl)
559 (set-case-syntax-pair ?Ɔ ?ɔ tbl)
560 (set-case-syntax-pair ?Ƈ ?ƈ tbl)
561 (set-case-syntax-pair ?Ɖ ?ɖ tbl)
562 (set-case-syntax-pair ?Ɗ ?ɗ tbl)
563 (set-case-syntax-pair ?Ƌ ?ƌ tbl)
564 (set-case-syntax-pair ?Ǝ ?ǝ tbl)
565 (set-case-syntax-pair ?Ə ?ə tbl)
566 (set-case-syntax-pair ?Ɛ ?ɛ tbl)
567 (set-case-syntax-pair ?Ƒ ?ƒ tbl)
568 (set-case-syntax-pair ?Ɠ ?ɠ tbl)
569 (set-case-syntax-pair ?Ɣ ?ɣ tbl)
570 (set-case-syntax-pair ?Ɩ ?ɩ tbl)
571 (set-case-syntax-pair ?Ɨ ?ɨ tbl)
572 (set-case-syntax-pair ?Ƙ ?ƙ tbl)
573 (set-case-syntax-pair ?Ɯ ?ɯ tbl)
574 (set-case-syntax-pair ?Ɲ ?ɲ tbl)
575 (set-case-syntax-pair ?Ɵ ?ɵ tbl)
576 (set-case-syntax-pair ?Ơ ?ơ tbl)
577 (set-case-syntax-pair ?Ƣ ?ƣ tbl)
578 (set-case-syntax-pair ?Ƥ ?ƥ tbl)
579 (set-case-syntax-pair ?Ʀ ?ʀ tbl)
580 (set-case-syntax-pair ?Ƨ ?ƨ tbl)
581 (set-case-syntax-pair ?Ʃ ?ʃ tbl)
582 (set-case-syntax-pair ?Ƭ ?ƭ tbl)
583 (set-case-syntax-pair ?Ʈ ?ʈ tbl)
584 (set-case-syntax-pair ?Ư ?ư tbl)
585 (set-case-syntax-pair ?Ʊ ?ʊ tbl)
586 (set-case-syntax-pair ?Ʋ ?ʋ tbl)
587 (set-case-syntax-pair ?Ƴ ?ƴ tbl)
588 (set-case-syntax-pair ?Ƶ ?ƶ tbl)
589 (set-case-syntax-pair ?Ʒ ?ʒ tbl)
590 (set-case-syntax-pair ?Ƹ ?ƹ tbl)
591 (set-case-syntax-pair ?Ƽ ?ƽ tbl)
592 (set-case-syntax-pair ?DŽ ?dž tbl)
593 (set-case-syntax-pair ?Dž ?dž tbl)
594 (set-case-syntax-pair ?LJ ?lj tbl)
595 (set-case-syntax-pair ?Lj ?lj tbl)
596 (set-case-syntax-pair ?NJ ?nj tbl)
597 (set-case-syntax-pair ?Nj ?nj tbl)
598 (set-case-syntax-pair ?Ǎ ?ǎ tbl)
599 (set-case-syntax-pair ?Ǐ ?ǐ tbl)
600 (set-case-syntax-pair ?Ǒ ?ǒ tbl)
601 (set-case-syntax-pair ?Ǔ ?ǔ tbl)
602 (set-case-syntax-pair ?Ǖ ?ǖ tbl)
603 (set-case-syntax-pair ?Ǘ ?ǘ tbl)
604 (set-case-syntax-pair ?Ǚ ?ǚ tbl)
605 (set-case-syntax-pair ?Ǜ ?ǜ tbl)
606 (set-case-syntax-pair ?Ǟ ?ǟ tbl)
607 (set-case-syntax-pair ?Ǡ ?ǡ tbl)
608 (set-case-syntax-pair ?Ǣ ?ǣ tbl)
609 (set-case-syntax-pair ?Ǥ ?ǥ tbl)
610 (set-case-syntax-pair ?Ǧ ?ǧ tbl)
611 (set-case-syntax-pair ?Ǩ ?ǩ tbl)
612 (set-case-syntax-pair ?Ǫ ?ǫ tbl)
613 (set-case-syntax-pair ?Ǭ ?ǭ tbl)
614 (set-case-syntax-pair ?Ǯ ?ǯ tbl)
615 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
616 (set-case-syntax-pair ?DZ ?dz tbl)
617 (set-case-syntax-pair ?Dz ?dz tbl)
618 (set-case-syntax-pair ?Ǵ ?ǵ tbl)
619 (set-case-syntax-pair ?Ƕ ?ƕ tbl)
620 (set-case-syntax-pair ?Ƿ ?ƿ tbl)
621 (set-case-syntax-pair ?Ǹ ?ǹ tbl)
622 (set-case-syntax-pair ?Ǻ ?ǻ tbl)
623 (set-case-syntax-pair ?Ǽ ?ǽ tbl)
624 (set-case-syntax-pair ?Ǿ ?ǿ tbl)
625 (set-case-syntax-pair ?Ȁ ?ȁ tbl)
626 (set-case-syntax-pair ?Ȃ ?ȃ tbl)
627 (set-case-syntax-pair ?Ȅ ?ȅ tbl)
628 (set-case-syntax-pair ?Ȇ ?ȇ tbl)
629 (set-case-syntax-pair ?Ȉ ?ȉ tbl)
630 (set-case-syntax-pair ?Ȋ ?ȋ tbl)
631 (set-case-syntax-pair ?Ȍ ?ȍ tbl)
632 (set-case-syntax-pair ?Ȏ ?ȏ tbl)
633 (set-case-syntax-pair ?Ȑ ?ȑ tbl)
634 (set-case-syntax-pair ?Ȓ ?ȓ tbl)
635 (set-case-syntax-pair ?Ȕ ?ȕ tbl)
636 (set-case-syntax-pair ?Ȗ ?ȗ tbl)
637 (set-case-syntax-pair ?Ș ?ș tbl)
638 (set-case-syntax-pair ?Ț ?ț tbl)
639 (set-case-syntax-pair ?Ȝ ?ȝ tbl)
640 (set-case-syntax-pair ?Ȟ ?ȟ tbl)
641 (set-case-syntax-pair ?Ȣ ?ȣ tbl)
642 (set-case-syntax-pair ?Ȥ ?ȥ tbl)
643 (set-case-syntax-pair ?Ȧ ?ȧ tbl)
644 (set-case-syntax-pair ?Ȩ ?ȩ tbl)
645 (set-case-syntax-pair ?Ȫ ?ȫ tbl)
646 (set-case-syntax-pair ?Ȭ ?ȭ tbl)
647 (set-case-syntax-pair ?Ȯ ?ȯ tbl)
648 (set-case-syntax-pair ?Ȱ ?ȱ tbl)
649 (set-case-syntax-pair ?Ȳ ?ȳ tbl)
650
651 ;; Latin Extended Additional
652 (modify-category-entry '(#x1e00 . #x1ef9) ?l)
653 (setq c #x1e00)
654 (while (<= c #x1ef9)
655 (and (zerop (% c 2))
656 (or (<= c #x1e94) (>= c #x1ea0))
657 (set-case-syntax-pair c (1+ c) tbl))
658 (setq c (1+ c)))
659
660 ;; Greek
661 (modify-category-entry '(#x0370 . #x03ff) ?g)
662 (setq c #x0370)
663 (while (<= c #x03ff)
664 (if (or (and (>= c #x0391) (<= c #x03a1))
665 (and (>= c #x03a3) (<= c #x03ab)))
666 (set-case-syntax-pair c (+ c 32) tbl))
667 (and (>= c #x03da)
668 (<= c #x03ee)
669 (zerop (% c 2))
670 (set-case-syntax-pair c (1+ c) tbl))
671 (setq c (1+ c)))
672 (set-case-syntax-pair ?Ά ?ά tbl)
673 (set-case-syntax-pair ?Έ ?έ tbl)
674 (set-case-syntax-pair ?Ή ?ή tbl)
675 (set-case-syntax-pair ?Ί ?ί tbl)
676 (set-case-syntax-pair ?Ό ?ό tbl)
677 (set-case-syntax-pair ?Ύ ?ύ tbl)
678 (set-case-syntax-pair ?Ώ ?ώ tbl)
679
680 ;; Armenian
681 (setq c #x531)
682 (while (<= c #x556)
683 (set-case-syntax-pair c (+ c #x30) tbl)
684 (setq c (1+ c)))
685
686 ;; Greek Extended
687 (modify-category-entry '(#x1f00 . #x1fff) ?g)
688 (setq c #x1f00)
689 (while (<= c #x1fff)
690 (and (<= (logand c #x000f) 7)
691 (<= c #x1fa7)
692 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
693 (/= (logand c #x00f0) 7)
694 (set-case-syntax-pair (+ c 8) c tbl))
695 (setq c (1+ c)))
696 (set-case-syntax-pair ?Ᾰ ?ᾰ tbl)
697 (set-case-syntax-pair ?Ᾱ ?ᾱ tbl)
698 (set-case-syntax-pair ?Ὰ ?ὰ tbl)
699 (set-case-syntax-pair ?Ά ?ά tbl)
700 (set-case-syntax-pair ?ᾼ ?ᾳ tbl)
701 (set-case-syntax-pair ?Ὲ ?ὲ tbl)
702 (set-case-syntax-pair ?Έ ?έ tbl)
703 (set-case-syntax-pair ?Ὴ ?ὴ tbl)
704 (set-case-syntax-pair ?Ή ?ή tbl)
705 (set-case-syntax-pair ?ῌ ?ῃ tbl)
706 (set-case-syntax-pair ?Ῐ ?ῐ tbl)
707 (set-case-syntax-pair ?Ῑ ?ῑ tbl)
708 (set-case-syntax-pair ?Ὶ ?ὶ tbl)
709 (set-case-syntax-pair ?Ί ?ί tbl)
710 (set-case-syntax-pair ?Ῠ ?ῠ tbl)
711 (set-case-syntax-pair ?Ῡ ?ῡ tbl)
712 (set-case-syntax-pair ?Ὺ ?ὺ tbl)
713 (set-case-syntax-pair ?Ύ ?ύ tbl)
714 (set-case-syntax-pair ?Ῥ ?ῥ tbl)
715 (set-case-syntax-pair ?Ὸ ?ὸ tbl)
716 (set-case-syntax-pair ?Ό ?ό tbl)
717 (set-case-syntax-pair ?Ὼ ?ὼ tbl)
718 (set-case-syntax-pair ?Ώ ?ώ tbl)
719 (set-case-syntax-pair ?ῼ ?ῳ tbl)
720
721 ;; cyrillic
722 (modify-category-entry '(#x0400 . #x04FF) ?y)
723 (setq c #x0400)
724 (while (<= c #x04ff)
725 (and (>= c #x0400)
726 (<= c #x040f)
727 (set-case-syntax-pair c (+ c 80) tbl))
728 (and (>= c #x0410)
729 (<= c #x042f)
730 (set-case-syntax-pair c (+ c 32) tbl))
731 (and (zerop (% c 2))
732 (or (and (>= c #x0460) (<= c #x0480))
733 (and (>= c #x048c) (<= c #x04be))
734 (and (>= c #x04d0) (<= c #x04f4)))
735 (set-case-syntax-pair c (1+ c) tbl))
736 (setq c (1+ c)))
737 (set-case-syntax-pair ?Ӂ ?ӂ tbl)
738 (set-case-syntax-pair ?Ӄ ?ӄ tbl)
739 (set-case-syntax-pair ?Ӈ ?ӈ tbl)
740 (set-case-syntax-pair ?Ӌ ?ӌ tbl)
741 (set-case-syntax-pair ?Ӹ ?ӹ tbl)
742
743 ;; general punctuation
744 (setq c #x2000)
745 (while (<= c #x200b)
746 (set-case-syntax c " " tbl)
747 (setq c (1+ c)))
748 (setq c #x2010)
749 (while (<= c #x2027)
750 (set-case-syntax c "_" tbl)
751 (setq c (1+ c)))
752
753 ;; Roman numerals
754 (setq c #x2160)
755 (while (<= c #x216f)
756 (set-case-syntax-pair c (+ c #x10) tbl)
757 (setq c (1+ c)))
758
759 ;; Circled Latin
760 (setq c #x24b6)
761 (while (<= c #x24cf)
762 (set-case-syntax-pair c (+ c 26) tbl)
763 (modify-category-entry c ?l)
764 (modify-category-entry (+ c 26) ?l)
765 (setq c (1+ c)))
766
767 ;; Fullwidth Latin
768 (setq c #xff21)
769 (while (<= c #xff3a)
770 (set-case-syntax-pair c (+ c #x20) tbl)
771 (modify-category-entry c ?l)
772 (modify-category-entry (+ c #x20) ?l)
773 (setq c (1+ c)))
774
775 ;; Combining diacritics
776 (modify-category-entry '(#x300 . #x362) ?^)
777 ;; Combining marks
778 (modify-category-entry '(#x20d0 . #x20e3) ?^)
779
780 ;; Fixme: syntax for symbols &c
781 )
782 \f
783 ;; For each character set, put the information of the most proper
784 ;; coding system to encode it by `preferred-coding-system' property.
785
786 ;; Fixme: should this be junked?
787 (let ((l '((latin-iso8859-1 . iso-latin-1)
788 (latin-iso8859-2 . iso-latin-2)
789 (latin-iso8859-3 . iso-latin-3)
790 (latin-iso8859-4 . iso-latin-4)
791 (thai-tis620 . thai-tis620)
792 (greek-iso8859-7 . greek-iso-8bit)
793 (arabic-iso8859-6 . iso-2022-7bit)
794 (hebrew-iso8859-8 . hebrew-iso-8bit)
795 (katakana-jisx0201 . japanese-shift-jis)
796 (latin-jisx0201 . japanese-shift-jis)
797 (cyrillic-iso8859-5 . cyrillic-iso-8bit)
798 (latin-iso8859-9 . iso-latin-5)
799 (japanese-jisx0208-1978 . iso-2022-jp)
800 (chinese-gb2312 . cn-gb-2312)
801 (japanese-jisx0208 . iso-2022-jp)
802 (korean-ksc5601 . iso-2022-kr)
803 (japanese-jisx0212 . iso-2022-jp)
804 (chinese-cns11643-1 . iso-2022-cn)
805 (chinese-cns11643-2 . iso-2022-cn)
806 (chinese-big5-1 . chinese-big5)
807 (chinese-big5-2 . chinese-big5)
808 (chinese-sisheng . iso-2022-7bit)
809 (ipa . iso-2022-7bit)
810 (vietnamese-viscii-lower . vietnamese-viscii)
811 (vietnamese-viscii-upper . vietnamese-viscii)
812 (arabic-digit . iso-2022-7bit)
813 (arabic-1-column . iso-2022-7bit)
814 (lao . lao)
815 (arabic-2-column . iso-2022-7bit)
816 (indian-is13194 . devanagari)
817 (indian-glyph . devanagari)
818 (tibetan-1-column . tibetan)
819 (ethiopic . iso-2022-7bit)
820 (chinese-cns11643-3 . iso-2022-cn)
821 (chinese-cns11643-4 . iso-2022-cn)
822 (chinese-cns11643-5 . iso-2022-cn)
823 (chinese-cns11643-6 . iso-2022-cn)
824 (chinese-cns11643-7 . iso-2022-cn)
825 (indian-2-column . devanagari)
826 (tibetan . tibetan)
827 (latin-iso8859-14 . iso-latin-8)
828 (latin-iso8859-15 . iso-latin-9))))
829 (while l
830 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
831 (setq l (cdr l))))
832
833 \f
834 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
835 ;; SPACE and NEWLINE are already set. Also put `nospace-between-words'
836 ;; property on the charsets.
837 (let ((l '(katakana-jisx0201
838 japanese-jisx0208 japanese-jisx0212
839 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
840 (while l
841 ;;(aset auto-fill-chars (make-char (car l)) t)
842 (put-charset-property (car l) 'nospace-between-words t)
843 (setq l (cdr l))))
844
845 \f
846 (set-char-table-range printable-chars '(32 . 126) t)
847 (set-char-table-range printable-chars (cons 160 (max-char)) t)
848
849 ;; CJK double width characters.
850 (let ((l '((#x1100 . #x11FF)
851 (#x2E80 . #x9FAF)
852 (#xAC00 . #xD7AF)
853 (#xF900 . #xFAFF)
854 (#xFE30 . #xFE4F)
855 (#xFF00 . #xFF5F)
856 (#xFFE0 . #xFFEF))))
857 (dolist (elt l)
858 (set-char-table-range char-width-table
859 (cons (car elt) (cdr elt))
860 2)))
861 (map-charset-chars
862 #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
863 'japanese-jisx0208)
864 (map-charset-chars
865 #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
866 'japanese-jisx0212)
867 (map-charset-chars
868 #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
869 'japanese-jisx0213-1)
870 (map-charset-chars
871 #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
872 'japanese-jisx0213-2)
873
874 ;; Other double width
875 (map-charset-chars
876 (lambda (range ignore) (set-char-table-range char-width-table range 2))
877 'ethiopic)
878 (map-charset-chars
879 (lambda (range ignore) (set-char-table-range char-width-table range 2))
880 'tibetan)
881 (map-charset-chars
882 (lambda (range ignore) (set-char-table-range char-width-table range 2))
883 'indian-2-column)
884 (map-charset-chars
885 (lambda (range ignore) (set-char-table-range char-width-table range 2))
886 'arabic-2-column)
887 (map-charset-chars
888 (lambda (range ignore) (set-char-table-range char-width-table range 2))
889 'korean-ksc5601)
890
891 (optimize-char-table (standard-case-table))
892 (optimize-char-table char-width-table)
893 (optimize-char-table (standard-category-table))
894 (optimize-char-table (standard-syntax-table))
895
896 (let ((script-list nil))
897 (dolist
898 (elt
899 '((#x0000 #x007F latin)
900 (#x00A0 #x036F latin)
901 (#x0370 #x03E1 greek)
902 (#x03E2 #x03EF coptic)
903 (#x03F0 #x03F3 greek)
904 (#x0400 #x04FF cyrillic)
905 (#x0530 #x058F armenian)
906 (#x0590 #x05FF hebrew)
907 (#x0600 #x06FF arabic)
908 (#x0700 #x074F syriac)
909 (#x0780 #x07BF thaana)
910 (#x0900 #x097F devanagari)
911 (#x0980 #x09FF bengali)
912 (#x0A00 #x0A7F gurmukhi)
913 (#x0A80 #x0AFF gujarati)
914 (#x0B00 #x0B7F oriya)
915 (#x0B80 #x0BFF tamil)
916 (#x0C00 #x0C7F telugu)
917 (#x0C80 #x0CFF kannada)
918 (#x0D00 #x0D7F malayalam)
919 (#x0D80 #x0DFF sinhala)
920 (#x0E00 #x0E5F thai)
921 (#x0E80 #x0EDF lao)
922 (#x0F00 #x0FFF tibetan)
923 (#x1000 #x105F myanmar)
924 (#x10A0 #x10FF georgian)
925 (#x1100 #x11FF hangul)
926 (#x1200 #x137F ethiopic)
927 (#x13A0 #x13FF cherokee)
928 (#x1400 #x167F canadian-aboriginal)
929 (#x1680 #x169F ogham)
930 (#x16A0 #x16FF runic)
931 (#x1780 #x17FF khmer)
932 (#x1800 #x18AF mongolian)
933 (#x1E00 #x1EFF latin)
934 (#x1F00 #x1FFF greek)
935 (#x20A0 #x20AF currency)
936 (#x2800 #x28FF braille)
937 (#x2E80 #x2FDF han)
938 (#x2FF0 #x2FFF ideographic-description)
939 (#x3000 #x303F cjk-misc)
940 (#x3040 #x30FF kana)
941 (#x3100 #x312F bopomofo)
942 (#x3130 #x318F hangul)
943 (#x3190 #x319F kanbun)
944 (#x31A0 #x31BF bopomofo)
945 (#x3400 #x9FAF han)
946 (#xA000 #xA4CF yi)
947 (#xAC00 #xD7AF hangul)
948 (#xF900 #xFA5F han)
949 (#xFB1D #xFB4F hebrew)
950 (#xFB50 #xFDFF arabic)
951 (#xFE70 #xFEFC arabic)
952 (#xFF00 #xFF5F cjk-misc)
953 (#xFF61 #xFF9F kana)
954 (#xFFE0 #xFFE6 cjk-misc)
955 (#x20000 #x2AFFF han)
956 (#x2F800 #x2FFFF han)))
957 (set-char-table-range char-script-table
958 (cons (car elt) (nth 1 elt)) (nth 2 elt))
959 (or (memq (nth 2 elt) script-list)
960 (setq script-list (cons (nth 2 elt) script-list))))
961 (set-char-table-extra-slot char-script-table 0 (nreverse script-list)))
962
963 \f
964 ;;; Setting word boundary.
965
966 (defun next-word-boundary-han (pos limit)
967 (if (<= pos limit)
968 (save-excursion
969 (goto-char pos)
970 (looking-at "\\cC+")
971 (goto-char (match-end 0))
972 (if (looking-at "\\cH+")
973 (goto-char (match-end 0)))
974 (point))
975 (while (and (> pos limit)
976 (eq (aref char-script-table (char-after (1- pos))) 'han))
977 (setq pos (1- pos)))
978 pos))
979
980 (defun next-word-boundary-kana (pos limit)
981 (if (<= pos limit)
982 (save-excursion
983 (goto-char pos)
984 (if (looking-at "\\cK+")
985 (goto-char (match-end 0)))
986 (if (looking-at "\\cH+")
987 (goto-char (match-end 0)))
988 (point))
989 (let ((category-set (char-category-set (char-after pos)))
990 category)
991 (if (aref category-set ?K)
992 (while (and (> pos limit)
993 (aref (char-category-set (char-after (1- pos))) ?K))
994 (setq pos (1- pos)))
995 (while (and (> pos limit)
996 (aref (setq category-set
997 (char-category-set (char-after (1- pos)))) ?H))
998 (setq pos (1- pos)))
999 (setq category (cond ((aref category-set ?C) ?C)
1000 ((aref category-set ?K) ?K)
1001 ((aref category-set ?A) ?A)))
1002 (when category
1003 (setq pos (1- pos))
1004 (while (and (> pos limit)
1005 (aref (char-category-set (char-after (1- pos)))
1006 category))
1007 (setq pos (1- pos)))))
1008 pos)))
1009
1010 (map-char-table
1011 #'(lambda (char script)
1012 (cond ((eq script 'han)
1013 (set-char-table-range next-word-boundary-function-table
1014 char #'next-word-boundary-han))
1015 ((eq script 'kana)
1016 (set-char-table-range next-word-boundary-function-table
1017 char #'next-word-boundary-kana))))
1018 char-script-table)
1019
1020 (setq word-combining-categories
1021 '((?l . ?l)))
1022
1023 (setq word-separating-categories ; (2-byte character sets)
1024 '((?A . ?K) ; Alpha numeric - Katakana
1025 (?A . ?C) ; Alpha numeric - Chinese
1026 (?H . ?A) ; Hiragana - Alpha numeric
1027 (?H . ?K) ; Hiragana - Katakana
1028 (?H . ?C) ; Hiragana - Chinese
1029 (?K . ?A) ; Katakana - Alpha numeric
1030 (?K . ?C) ; Katakana - Chinese
1031 (?C . ?A) ; Chinese - Alpha numeric
1032 (?C . ?K) ; Chinese - Katakana
1033 ))
1034
1035 ;;; Local Variables:
1036 ;;; coding: utf-8-emacs
1037 ;;; End:
1038
1039 ;;; characters.el ends here