lisp/international/characters.el

   1 ;;; characters.el --- set syntax and category for multibyte characters
   2
   3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
   4 ;; Licensed to the Free Software Foundation.
   5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
   6 ;; Copyright (C) 2001, 2002
   7 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
   8 ;;   Registration Number H13PRO009
   9
  10 ;; Keywords: multibyte character, character set, syntax, category
  11
  12 ;; This file is part of GNU Emacs.
  13
  14 ;; GNU Emacs is free software; you can redistribute it and/or modify
  15 ;; it under the terms of the GNU General Public License as published by
  16 ;; the Free Software Foundation; either version 2, or (at your option)
  17 ;; any later version.
  18
  19 ;; GNU Emacs is distributed in the hope that it will be useful,
  20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 ;; GNU General Public License for more details.
  23
  24 ;; You should have received a copy of the GNU General Public License
  25 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
  26 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  27 ;; Boston, MA 02111-1307, USA.
  28
  29 ;;; Commentary:
  30
  31 ;; This file contains multibyte characters.  Save this file always in
  32 ;; the coding system `iso-2022-7bit'.
  33
  34 ;; This file does not define the syntax for Latin-N character sets;
  35 ;; those are defined by the files latin-N.el.
  36
  37 ;;; Code:
  38
  39 ;;; Predefined categories.
  40
  41 ;; For each character set.
  42
  43 (define-category ?a "ASCII")
  44 (define-category ?l "Latin")
  45 (define-category ?t "Thai")
  46 (define-category ?g "Greek")
  47 (define-category ?b "Arabic")
  48 (define-category ?w "Hebrew")
  49 (define-category ?y "Cyrillic")
  50 (define-category ?k "Japanese katakana")
  51 (define-category ?r "Japanese roman")
  52 (define-category ?c "Chinese")
  53 (define-category ?j "Japanese")
  54 (define-category ?h "Korean")
  55 (define-category ?e "Ethiopic (Ge'ez)")
  56 (define-category ?v "Vietnamese")
  57 (define-category ?i "Indian")
  58 (define-category ?o "Lao")
  59 (define-category ?q "Tibetan")
  60
  61 ;; For each group (row) of 2-byte character sets.
  62
  63 (define-category ?A "Alpha-numeric characters of 2-byte character sets")
  64 (define-category ?C "Chinese (Han) characters of 2-byte character sets")
  65 (define-category ?G "Greek characters of 2-byte character sets")
  66 (define-category ?H "Japanese Hiragana characters of 2-byte character sets")
  67 (define-category ?K "Japanese Katakana characters of 2-byte character sets")
  68 (define-category ?N "Korean Hangul characters of 2-byte character sets")
  69 (define-category ?Y "Cyrillic characters of 2-byte character sets")
  70 (define-category ?I "Indian Glyphs")
  71
  72 ;; For phonetic classifications.
  73
  74 (define-category ?0 "consonant")
  75 (define-category ?1 "base (independent) vowel")
  76 (define-category ?2 "upper diacritical mark (including upper vowel)")
  77 (define-category ?3 "lower diacritical mark (including lower vowel)")
  78 (define-category ?4 "tone mark")
  79 (define-category ?5 "symbol")
  80 (define-category ?6 "digit")
  81 (define-category ?7 "vowel-modifying diacritical mark")
  82 (define-category ?8 "vowel-signs")
  83 (define-category ?9 "semivowel lower")
  84
  85 ;; For filling.
  86 (define-category ?| "While filling, we can break a line at this character.")
  87
  88 ;; For indentation calculation.
  89 (define-category ?
  90   "This character counts as a space for indentation purposes.")
  91
  92 ;; Keep the following for `kinsoku' processing.  See comments in
  93 ;; kinsoku.el.
  94 (define-category ?> "A character which can't be placed at beginning of line.")
  95 (define-category ?< "A character which can't be placed at end of line.")
  96
  97 ;; Combining
  98 (define-category ?^ "Combining diacritic or mark")
  99 \f
 100 ;;; Setting syntax and category.
 101
 102 ;; ASCII
 103
 104 ;; All ASCII characters have the category `a' (ASCII) and `l' (Latin).
 105 (modify-category-entry '(32 . 127) ?a)
 106 (modify-category-entry '(32 . 127) ?l)
 107
 108 ;; Arabic character set
 109
 110 (let ((charsets '(arabic-iso8859-6
 111                   arabic-digit
 112                   arabic-1-column
 113                   arabic-2-column)))
 114   (while charsets
 115     (map-charset-chars #'modify-category-entry (car charsets) ?b)
 116     (setq charsets (cdr charsets))))
 117 (modify-category-entry '(#x600 . #x6ff) ?b)
 118 (modify-category-entry '(#xfb50 . #xfdff) ?b)
 119 (modify-category-entry '(#xfe70 . #xfefe) ?b)
 120
 121 ;; Chinese characters (Unicode)
 122 (modify-category-entry '(#x3400 . #x9FAF) ?C)
 123 (modify-category-entry '(#x3400 . #x9FAF) ?c)
 124 (modify-category-entry '(#x3400 . #x9FAF) ?|)
 125 (modify-category-entry '(#xF900 . #xFAFF) ?C)
 126 (modify-category-entry '(#xF900 . #xFAFF) ?c)
 127 (modify-category-entry '(#xF900 . #xFAFF) ?|)
 128
 129 ;; Chinese character set (GB2312)
 130
 131 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2121 #x217E)
 132 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2221 #x227E)
 133 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2921 #x297E)
 134 (modify-syntax-entry ?\〔 "(〕")
 135 (modify-syntax-entry ?\〈 "(〉")
 136 (modify-syntax-entry ?\《 "(》")
 137 (modify-syntax-entry ?\「 "(」")
 138 (modify-syntax-entry ?\『 "(』")
 139 (modify-syntax-entry ?\〖 "(〗")
 140 (modify-syntax-entry ?\【 "(】")
 141 (modify-syntax-entry ?\〕 ")〔")
 142 (modify-syntax-entry ?\〉 ")〈")
 143 (modify-syntax-entry ?\》 ")《")
 144 (modify-syntax-entry ?\」 ")「")
 145 (modify-syntax-entry ?\』 ")『")
 146 (modify-syntax-entry ?\〗 ")〖")
 147 (modify-syntax-entry ?\】 ")【")
 148
 149 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c)
 150 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?|)
 151 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2330 #x2339)
 152 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2341 #x235A)
 153 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2361 #x237A)
 154 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?H #x2421 #x247E)
 155 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?K #x2521 #x257E)
 156 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?G #x2621 #x267E)
 157 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E)
 158 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E)
 159
 160 ;; Chinese character set (BIG5)
 161
 162 (map-charset-chars #'modify-category-entry 'big5 ?c)
 163 (map-charset-chars #'modify-category-entry 'big5 ?C #xA259 #xA25F)
 164 (map-charset-chars #'modify-category-entry 'big5 ?C #xA440 #xC67E)
 165 (map-charset-chars #'modify-category-entry 'big5 ?C #xC940 #xF9DF)
 166 (map-charset-chars #'modify-category-entry 'big5 ?|)
 167
 168
 169 ;; Chinese character set (CNS11643)
 170
 171 (dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
 172              chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
 173              chinese-cns11643-7))
 174   (map-charset-chars #'modify-category-entry c ?c)
 175   (if (eq c 'chinese-cns11643-1)
 176       (map-charset-chars #'modify-category-entry c ?C #x4421 #x7E7E)
 177     (map-charset-chars #'modify-category-entry c ?C))
 178   (map-charset-chars #'modify-category-entry c ?|))
 179
 180 ;; Cyrillic character set (ISO-8859-5)
 181
 182 (modify-syntax-entry ?№ ".")
 183
 184 ;; Ethiopic character set
 185
 186 (modify-category-entry '(#x1200 . #x137b) ?e)
 187 (let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨ ?���� ?���� ?���� ?���� ?���� ?����)))
 188   (while chars
 189     (modify-syntax-entry (car chars) ".")
 190     (setq chars (cdr chars))))
 191 (map-charset-chars #'modify-category-entry 'ethiopic ?e)
 192
 193 ;; Hebrew character set (ISO-8859-8)
 194
 195 (modify-syntax-entry #x5be ".") ; MAQAF
 196 (modify-syntax-entry #x5c0 ".") ; PASEQ
 197 (modify-syntax-entry #x5c3 ".") ; SOF PASUQ
 198 (modify-syntax-entry #x5f3 ".") ; GERESH
 199 (modify-syntax-entry #x5f4 ".") ; GERSHAYIM
 200
 201 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
 202
 203 (modify-category-entry '(#x901 . #x970) ?i)
 204 (map-charset-chars #'modify-category-entry 'indian-is13194 ?i)
 205 (map-charset-chars #'modify-category-entry 'indian-2-column ?i)
 206
 207
 208 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
 209
 210 (map-charset-chars #'modify-category-entry 'katakana-jisx0201 ?k)
 211
 212 (map-charset-chars #'modify-category-entry 'latin-jisx0201 ?r)
 213
 214 (dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212))
 215   (map-charset-chars #'modify-category-entry l ?j)
 216   (map-charset-chars #'modify-category-entry l ?\|))
 217
 218 ;; Unicode equivalents of JISX0201-kana
 219 (let ((range '(#xff61 . #xff9f)))
 220   (modify-category-entry range  ?k)
 221   (modify-category-entry range ?j)
 222   (modify-category-entry range ?\|))
 223
 224 ;; Katakana block
 225 (let ((range '(#x30a0 . #x30ff)))
 226   ;; ?K is double width, ?k isn't specified
 227   (modify-category-entry range ?K)
 228   (modify-category-entry range ?\|))
 229
 230 ;; Hiragana block
 231 (let ((range '(#x3040 . #x309f)))
 232   ;; ?H is actually defined to be double width
 233   ;;(modify-category-entry range ?H)
 234   ;;(modify-category-entry range ?\|)
 235   )
 236
 237 ;; JISX0208
 238 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2121 #x227E)
 239 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2821 #x287E)
 240 (let ((chars '(?ー ?゛ ?゜ ?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇)))
 241   (dolist (elt chars)
 242     (modify-syntax-entry (car chars) "w")))
 243 (modify-syntax-entry ?\（ "(）")
 244 (modify-syntax-entry ?\［ "(］")
 245 (modify-syntax-entry ?\｛ "(｝")
 246 (modify-syntax-entry ?\「 "(」")
 247 (modify-syntax-entry ?\『 "(』")
 248 (modify-syntax-entry ?\） ")（")
 249 (modify-syntax-entry ?\］ ")［")
 250 (modify-syntax-entry ?\｝ ")｛")
 251 (modify-syntax-entry ?\」 ")「")
 252 (modify-syntax-entry ?\』 ")『")
 253
 254 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?A #x2321 #x237E)
 255 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?H #x2421 #x247E)
 256 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?K #x2521 #x257E)
 257 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?G #x2621 #x267E)
 258 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?Y #x2721 #x277E)
 259 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?C #x3021 #x7E7E)
 260 (modify-category-entry ?ー ?K)
 261 (let ((chars '(?゛ ?゜)))
 262   (while chars
 263     (modify-category-entry (car chars) ?K)
 264     (modify-category-entry (car chars) ?H)
 265     (setq chars (cdr chars))))
 266 (let ((chars '(?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇)))
 267   (while chars
 268     (modify-category-entry (car chars) ?C)
 269     (setq chars (cdr chars))))
 270
 271 ;; JISX0212
 272
 273 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0212 "_" #x2121 #x237E)
 274
 275 ;; JISX0201-Kana
 276
 277 (let ((chars '(?｡ ?､ ?･)))
 278   (while chars
 279     (modify-syntax-entry (car chars) ".")
 280     (setq chars (cdr chars))))
 281
 282 (modify-syntax-entry ?\｢ "(｣")
 283 (modify-syntax-entry ?\｣ "(｢")
 284
 285 ;; Korean character set (KSC5601)
 286
 287 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?h)
 288
 289 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2121 #x227E)
 290 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2621 #x277E)
 291 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2830 #x287E)
 292 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2930 #x297E)
 293 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2330 #x2339)
 294 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2341 #x235A)
 295 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2361 #x237A)
 296 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?G #x2521 #x257E)
 297 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?H #x2A21 #x2A7E)
 298 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?K #x2B21 #x2B7E)
 299 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?Y #x2C21 #x2C7E)
 300
 301 ;; Latin
 302
 303 (modify-category-entry '(#x80 . #x024F) ?l)
 304
 305 ;; Lao character set
 306
 307 (modify-category-entry '(#xe80 . #xeff) ?o)
 308 (map-charset-chars #'modify-category-entry 'lao ?o)
 309
 310 (let ((deflist  '(("ກ-ຮ"    "w"     ?0) ; consonant
 311                   ("ະາຳຽເ-ໄ"        "w"     ?1) ; vowel base
 312                   ("ັິ-ືົໍ"   "w"     ?2) ; vowel upper
 313                   ("ຸູ"     "w"     ?3) ; vowel lower
 314                   ("່-໋"    "w"     ?4) ; tone mark
 315                   ("ຼຽ"     "w"     ?9) ; semivowel lower
 316                   ("໐-໙"    "w"     ?6) ; digit
 317                   ("ຯໆ"     "_"     ?5) ; symbol
 318                   ))
 319       elm chars len syntax category to ch i)
 320   (while deflist
 321     (setq elm (car deflist))
 322     (setq chars (car elm)
 323           len (length chars)
 324           syntax (nth 1 elm)
 325           category (nth 2 elm)
 326           i 0)
 327     (while (< i len)
 328       (if (= (aref chars i) ?-)
 329           (setq i (1+ i)
 330                 to (aref chars i))
 331         (setq ch (aref chars i)
 332               to ch))
 333       (while (<= ch to)
 334         (unless (string-equal syntax "w")
 335           (modify-syntax-entry ch syntax))
 336         (modify-category-entry ch category)
 337         (setq ch (1+ ch)))
 338       (setq i (1+ i)))
 339     (setq deflist (cdr deflist))))
 340
 341 ;; Thai character set (TIS620)
 342
 343 (modify-category-entry '(#xe00 . #xe7f) ?t)
 344 (map-charset-chars #'modify-category-entry 'thai-tis620 ?t)
 345
 346 (let ((deflist  '(;; chars      syntax  category
 347                   ("ก-รลว-ฮ"  "w"     ?0) ; consonant
 348                   ("ฤฦะาำเ-ๅ"     "w"     ?1) ; vowel base
 349                   ("ัิ-ื็๎"   "w"     ?2) ; vowel upper
 350                   ("ุ-ฺ"    "w"     ?3) ; vowel lower
 351                   ("่-ํ"    "w"     ?4) ; tone mark
 352                   ("๐-๙"    "w"     ?6) ; digit
 353                   ("ฯๆ฿๏๚๛" "_"     ?5) ; symbol
 354                   ))
 355       elm chars len syntax category to ch i)
 356   (while deflist
 357     (setq elm (car deflist))
 358     (setq chars (car elm)
 359           len (length chars)
 360           syntax (nth 1 elm)
 361           category (nth 2 elm)
 362           i 0)
 363     (while (< i len)
 364       (if (= (aref chars i) ?-)
 365           (setq i (1+ i)
 366                 to (aref chars i))
 367         (setq ch (aref chars i)
 368               to ch))
 369       (while (<= ch to)
 370         (unless (string-equal syntax "w")
 371           (modify-syntax-entry ch syntax))
 372         (modify-category-entry ch category)
 373         (setq ch (1+ ch)))
 374       (setq i (1+ i)))
 375     (setq deflist (cdr deflist))))
 376
 377 ;; Tibetan character set
 378
 379 (modify-category-entry '(#xf00 . #xfff) ?q)
 380 (map-charset-chars #'modify-category-entry 'tibetan ?q)
 381 (map-charset-chars #'modify-category-entry 'tibetan-1-column ?q)
 382
 383 (let ((deflist  '(;; chars             syntax category
 384                   ("ཀ-ཀྵཪ"         "w"     ?0) ; consonant
 385                   ("ྐ-ྐྵྺྻྼ��������"       "w"     ?0) ;
 386                   ("����-����"              "w"     ?0) ;
 387                   ("����-����"              "w"     ?0) ;
 388                   ("ིེཻོཽྀ"       "w"       ?2) ; upper vowel
 389                   ("ཾྂྃ྆྇ྈྉྊྋ" "w"    ?2) ; upper modifier
 390                   ("༙����྄ཱུ༵༷"       "w"   ?3) ; lowel vowel/modifier
 391                   ("༠-༩༪-༳"             "w"     ?6) ; digit
 392                   ("་།-༒༔ཿ"        "."     ?|) ; line-break char
 393                   ("་།༏༐༑༔ཿ"            "."     ?|) ;
 394                   ("༈་།-༒༔ཿ༽༴"  "."     ?>) ; prohibition
 395                   ("་།༏༐༑༔ཿ"            "."     ?>) ;
 396                   ("ༀ-༊༼࿁࿂྅"      "."     ?<) ; prohibition
 397                   ("༓༕-༘༚-༟༶༸-༻༾༿྾྿-࿏" "." ?q) ; others
 398                   ))
 399       elm chars len syntax category to ch i)
 400   (while deflist
 401     (setq elm (car deflist))
 402     (setq chars (car elm)
 403           len (length chars)
 404           syntax (nth 1 elm)
 405           category (nth 2 elm)
 406           i 0)
 407     (while (< i len)
 408       (if (= (aref chars i) ?-)
 409           (setq i (1+ i)
 410                 to (aref chars i))
 411         (setq ch (aref chars i)
 412               to ch))
 413       (while (<= ch to)
 414         (unless (string-equal syntax "w")
 415           (modify-syntax-entry ch syntax))
 416         (modify-category-entry ch category)
 417         (setq ch (1+ ch)))
 418       (setq i (1+ i)))
 419     (setq deflist (cdr deflist))))
 420
 421 ;; Vietnamese character set
 422
 423 ;; To make a word with Latin characters
 424 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?l)
 425 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?v)
 426
 427 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?l)
 428 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?v)
 429
 430 (let ((tbl (standard-case-table))
 431       (i 32))
 432   (while (< i 128)
 433     (let* ((char (decode-char 'vietnamese-viscii-upper i))
 434            (charl (decode-char 'vietnamese-viscii-lower i))
 435            (uc (encode-char char 'ucs))
 436            (lc (encode-char charl 'ucs)))
 437       (set-case-syntax-pair char (decode-char 'vietnamese-viscii-lower i)
 438                             tbl)
 439       (if uc (modify-category-entry uc ?v))
 440       (if lc (modify-category-entry lc ?v)))
 441     (setq i (1+ i))))
 442
 443 (let ((tbl (standard-case-table)) c)
 444
 445 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
 446 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
 447 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
 448 ;; Thus we have to check language-environment to handle casing
 449 ;; correctly.  Currently only I<->i is available.
 450
 451   ;; Latin Extended-A, Latin Extended-B
 452   (setq c #x0100)
 453   (while (<= c #x0233)
 454     (and (or (<= c #x012e)
 455              (and (>= c #x014a) (<= c #x0177)))
 456          (zerop (% c 2))
 457          (set-case-syntax-pair c (1+ c) tbl))
 458     (and (>= c #x013a)
 459          (<= c #x0148)
 460          (zerop (% c 2))
 461          (set-case-syntax-pair (1- c) c tbl))
 462     (setq c (1+ c)))
 463   (set-case-syntax-pair ?Ĳ ?ĳ tbl)
 464   (set-case-syntax-pair ?Ĵ ?ĵ tbl)
 465   (set-case-syntax-pair ?Ķ ?ķ tbl)
 466   (set-case-syntax-pair ?Ÿ ?ÿ tbl)
 467   (set-case-syntax-pair ?Ź ?ź tbl)
 468   (set-case-syntax-pair ?Ż ?ż tbl)
 469   (set-case-syntax-pair ?Ž ?ž tbl)
 470
 471   ;; Latin Extended-B
 472   (set-case-syntax-pair ?Ɓ ?ɓ tbl)
 473   (set-case-syntax-pair ?Ƃ ?ƃ tbl)
 474   (set-case-syntax-pair ?Ƅ ?ƅ tbl)
 475   (set-case-syntax-pair ?Ɔ ?ɔ tbl)
 476   (set-case-syntax-pair ?Ƈ ?ƈ tbl)
 477   (set-case-syntax-pair ?Ɖ ?ɖ tbl)
 478   (set-case-syntax-pair ?Ɗ ?ɗ tbl)
 479   (set-case-syntax-pair ?Ƌ ?ƌ tbl)
 480   (set-case-syntax-pair ?Ǝ ?ǝ tbl)
 481   (set-case-syntax-pair ?Ə ?ə tbl)
 482   (set-case-syntax-pair ?Ɛ ?ɛ tbl)
 483   (set-case-syntax-pair ?Ƒ ?ƒ tbl)
 484   (set-case-syntax-pair ?Ɠ ?ɠ tbl)
 485   (set-case-syntax-pair ?Ɣ ?ɣ tbl)
 486   (set-case-syntax-pair ?Ɩ ?ɩ tbl)
 487   (set-case-syntax-pair ?Ɨ ?ɨ tbl)
 488   (set-case-syntax-pair ?Ƙ ?ƙ tbl)
 489   (set-case-syntax-pair ?Ɯ ?ɯ tbl)
 490   (set-case-syntax-pair ?Ɲ ?ɲ tbl)
 491   (set-case-syntax-pair ?Ɵ ?ɵ tbl)
 492   (set-case-syntax-pair ?Ơ ?ơ tbl)
 493   (set-case-syntax-pair ?Ƣ ?ƣ tbl)
 494   (set-case-syntax-pair ?Ƥ ?ƥ tbl)
 495   (set-case-syntax-pair ?Ʀ ?ʀ tbl)
 496   (set-case-syntax-pair ?Ƨ ?ƨ tbl)
 497   (set-case-syntax-pair ?Ʃ ?ʃ tbl)
 498   (set-case-syntax-pair ?Ƭ ?ƭ tbl)
 499   (set-case-syntax-pair ?Ʈ ?ʈ tbl)
 500   (set-case-syntax-pair ?Ư ?ư tbl)
 501   (set-case-syntax-pair ?Ʊ ?ʊ tbl)
 502   (set-case-syntax-pair ?Ʋ ?ʋ tbl)
 503   (set-case-syntax-pair ?Ƴ ?ƴ tbl)
 504   (set-case-syntax-pair ?Ƶ ?ƶ tbl)
 505   (set-case-syntax-pair ?Ʒ ?ʒ tbl)
 506   (set-case-syntax-pair ?Ƹ ?ƹ tbl)
 507   (set-case-syntax-pair ?Ƽ ?ƽ tbl)
 508   (set-case-syntax-pair ?Ǆ ?ǆ tbl)
 509   (set-case-syntax-pair ?ǅ ?ǆ tbl)
 510   (set-case-syntax-pair ?Ǉ ?ǉ tbl)
 511   (set-case-syntax-pair ?ǈ ?ǉ tbl)
 512   (set-case-syntax-pair ?Ǌ ?ǌ tbl)
 513   (set-case-syntax-pair ?ǋ ?ǌ tbl)
 514   (set-case-syntax-pair ?Ǎ ?ǎ tbl)
 515   (set-case-syntax-pair ?Ǐ ?ǐ tbl)
 516   (set-case-syntax-pair ?Ǒ ?ǒ tbl)
 517   (set-case-syntax-pair ?Ǔ ?ǔ tbl)
 518   (set-case-syntax-pair ?Ǖ ?ǖ tbl)
 519   (set-case-syntax-pair ?Ǘ ?ǘ tbl)
 520   (set-case-syntax-pair ?Ǚ ?ǚ tbl)
 521   (set-case-syntax-pair ?Ǜ ?ǜ tbl)
 522   (set-case-syntax-pair ?Ǟ ?ǟ tbl)
 523   (set-case-syntax-pair ?Ǡ ?ǡ tbl)
 524   (set-case-syntax-pair ?Ǣ ?ǣ tbl)
 525   (set-case-syntax-pair ?Ǥ ?ǥ tbl)
 526   (set-case-syntax-pair ?Ǧ ?ǧ tbl)
 527   (set-case-syntax-pair ?Ǩ ?ǩ tbl)
 528   (set-case-syntax-pair ?Ǫ ?ǫ tbl)
 529   (set-case-syntax-pair ?Ǭ ?ǭ tbl)
 530   (set-case-syntax-pair ?Ǯ ?ǯ tbl)
 531   ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
 532   (set-case-syntax-pair ?Ǳ ?ǳ tbl)
 533   (set-case-syntax-pair ?ǲ ?ǳ tbl)
 534   (set-case-syntax-pair ?Ǵ ?ǵ tbl)
 535   (set-case-syntax-pair ?Ƕ ?ƕ tbl)
 536   (set-case-syntax-pair ?Ƿ ?ƿ tbl)
 537   (set-case-syntax-pair ?Ǹ ?ǹ tbl)
 538   (set-case-syntax-pair ?Ǻ ?ǻ tbl)
 539   (set-case-syntax-pair ?Ǽ ?ǽ tbl)
 540   (set-case-syntax-pair ?Ǿ ?ǿ tbl)
 541   (set-case-syntax-pair ?Ȁ ?ȁ tbl)
 542   (set-case-syntax-pair ?Ȃ ?ȃ tbl)
 543   (set-case-syntax-pair ?Ȅ ?ȅ tbl)
 544   (set-case-syntax-pair ?Ȇ ?ȇ tbl)
 545   (set-case-syntax-pair ?Ȉ ?ȉ tbl)
 546   (set-case-syntax-pair ?Ȋ ?ȋ tbl)
 547   (set-case-syntax-pair ?Ȍ ?ȍ tbl)
 548   (set-case-syntax-pair ?Ȏ ?ȏ tbl)
 549   (set-case-syntax-pair ?Ȑ ?ȑ tbl)
 550   (set-case-syntax-pair ?Ȓ ?ȓ tbl)
 551   (set-case-syntax-pair ?Ȕ ?ȕ tbl)
 552   (set-case-syntax-pair ?Ȗ ?ȗ tbl)
 553   (set-case-syntax-pair ?Ș ?ș tbl)
 554   (set-case-syntax-pair ?Ț ?ț tbl)
 555   (set-case-syntax-pair ?Ȝ ?ȝ tbl)
 556   (set-case-syntax-pair ?Ȟ ?ȟ tbl)
 557   (set-case-syntax-pair ?Ȣ ?ȣ tbl)
 558   (set-case-syntax-pair ?Ȥ ?ȥ tbl)
 559   (set-case-syntax-pair ?Ȧ ?ȧ tbl)
 560   (set-case-syntax-pair ?Ȩ ?ȩ tbl)
 561   (set-case-syntax-pair ?Ȫ ?ȫ tbl)
 562   (set-case-syntax-pair ?Ȭ ?ȭ tbl)
 563   (set-case-syntax-pair ?Ȯ ?ȯ tbl)
 564   (set-case-syntax-pair ?Ȱ ?ȱ tbl)
 565   (set-case-syntax-pair ?Ȳ ?ȳ tbl)
 566
 567   ;; Latin Extended Additional
 568   (modify-category-entry '(#x1e00 . #x1ef9) ?l)
 569   (setq c #x1e00)
 570   (while (<= c #x1ef9)
 571     (and (zerop (% c 2))
 572          (or (<= c #x1e94) (>= c #x1ea0))
 573          (set-case-syntax-pair c (1+ c) tbl))
 574     (setq c (1+ c)))
 575
 576   ;; Greek
 577   (modify-category-entry '(#x0370 . #x03ff) ?g)
 578   (setq c #x0370)
 579   (while (<= c #x03ff)
 580     (if (or (and (>= c #x0391) (<= c #x03a1))
 581             (and (>= c #x03a3) (<= c #x03ab)))
 582         (set-case-syntax-pair c (+ c 32) tbl))
 583     (and (>= c #x03da)
 584          (<= c #x03ee)
 585          (zerop (% c 2))
 586          (set-case-syntax-pair c (1+ c) tbl))
 587     (setq c (1+ c)))
 588   (set-case-syntax-pair ?Ά ?ά tbl)
 589   (set-case-syntax-pair ?Έ ?έ tbl)
 590   (set-case-syntax-pair ?Ή ?ή tbl)
 591   (set-case-syntax-pair ?Ί ?ί tbl)
 592   (set-case-syntax-pair ?Ό ?ό tbl)
 593   (set-case-syntax-pair ?Ύ ?ύ tbl)
 594   (set-case-syntax-pair ?Ώ ?ώ tbl)
 595
 596   ;; Armenian
 597   (setq c #x531)
 598   (while (<= c #x556)
 599     (set-case-syntax-pair c (+ c #x30) tbl)
 600     (setq c (1+ c)))
 601
 602   ;; Greek Extended
 603   (modify-category-entry '(#x1f00 . #x1fff) ?g)
 604   (setq c #x1f00)
 605   (while (<= c #x1fff)
 606     (and (<= (logand c #x000f) 7)
 607          (<= c #x1fa7)
 608          (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
 609          (/= (logand c #x00f0) 7)
 610          (set-case-syntax-pair (+ c 8) c tbl))
 611     (setq c (1+ c)))
 612   (set-case-syntax-pair ?Ᾰ ?ᾰ tbl)
 613   (set-case-syntax-pair ?Ᾱ ?ᾱ tbl)
 614   (set-case-syntax-pair ?Ὰ ?ὰ tbl)
 615   (set-case-syntax-pair ?Ά ?ά tbl)
 616   (set-case-syntax-pair ?ᾼ ?ᾳ tbl)
 617   (set-case-syntax-pair ?Ὲ ?ὲ tbl)
 618   (set-case-syntax-pair ?Έ ?έ tbl)
 619   (set-case-syntax-pair ?Ὴ ?ὴ tbl)
 620   (set-case-syntax-pair ?Ή ?ή tbl)
 621   (set-case-syntax-pair ?ῌ ?ῃ tbl)
 622   (set-case-syntax-pair ?Ῐ ?ῐ tbl)
 623   (set-case-syntax-pair ?Ῑ ?ῑ tbl)
 624   (set-case-syntax-pair ?Ὶ ?ὶ tbl)
 625   (set-case-syntax-pair ?Ί ?ί tbl)
 626   (set-case-syntax-pair ?Ῠ ?ῠ tbl)
 627   (set-case-syntax-pair ?Ῡ ?ῡ tbl)
 628   (set-case-syntax-pair ?Ὺ ?ὺ tbl)
 629   (set-case-syntax-pair ?Ύ ?ύ tbl)
 630   (set-case-syntax-pair ?Ῥ ?ῥ tbl)
 631   (set-case-syntax-pair ?Ὸ ?ὸ tbl)
 632   (set-case-syntax-pair ?Ό ?ό tbl)
 633   (set-case-syntax-pair ?Ὼ ?ὼ tbl)
 634   (set-case-syntax-pair ?Ώ ?ώ tbl)
 635   (set-case-syntax-pair ?ῼ ?ῳ tbl)
 636
 637   ;; cyrillic
 638   (modify-category-entry '(#x0400 . #x04FF) ?y)
 639   (setq c #x0400)
 640   (while (<= c #x04ff)
 641     (and (>= c #x0400)
 642          (<= c #x040f)
 643          (set-case-syntax-pair c (+ c 80) tbl))
 644     (and (>= c #x0410)
 645          (<= c #x042f)
 646          (set-case-syntax-pair c (+ c 32) tbl))
 647     (and (zerop (% c 2))
 648          (or (and (>= c #x0460) (<= c #x0480))
 649              (and (>= c #x048c) (<= c #x04be))
 650              (and (>= c #x04d0) (<= c #x04f4)))
 651          (set-case-syntax-pair c (1+ c) tbl))
 652     (setq c (1+ c)))
 653   (set-case-syntax-pair ?Ӂ ?ӂ tbl)
 654   (set-case-syntax-pair ?Ӄ ?ӄ tbl)
 655   (set-case-syntax-pair ?Ӈ ?ӈ tbl)
 656   (set-case-syntax-pair ?Ӌ ?ӌ tbl)
 657   (set-case-syntax-pair ?Ӹ ?ӹ tbl)
 658
 659   ;; general punctuation
 660   (setq c #x2000)
 661   (while (<= c #x200b)
 662     (set-case-syntax c " " tbl)
 663     (setq c (1+ c)))
 664   (while (<= c #x200F)
 665     (set-case-syntax c "." tbl)
 666     (setq c (1+ c)))
 667   ;; Fixme: These aren't all right:
 668   (while (<= c #x2027)
 669     (set-case-syntax c "_" tbl)
 670     (setq c (1+ c)))
 671   (while (<= c #x206F)
 672     (set-case-syntax c "." tbl)
 673     (setq c (1+ c)))
 674
 675   ;; Roman numerals
 676   (setq c #x2160)
 677   (while (<= c #x216f)
 678     (set-case-syntax-pair c (+ c #x10) tbl)
 679     (setq c (1+ c)))
 680
 681   ;; Arrows
 682   (setq c #x2190)
 683   (while (<= c #c21FF)
 684     (set-case-syntax-pair c "." tbl)
 685     (setq c (1+ c)))
 686   ;; Mathematical Operators
 687   (while (<= c #x22FF)
 688     (set-case-syntax-pair c "." tbl)
 689     (setq c (1+ c)))
 690   ;; Miscellaneous Technical
 691   (while (<= c #x23FF)
 692     (set-case-syntax-pair c "." tbl)
 693     (setq c (1+ c)))
 694   ;; Control Pictures
 695   (while (<= c #x243F)
 696     (set-case-syntax-pair c "_" tbl)
 697     (set c (1+ c)))
 698
 699   ;; Circled Latin
 700   (setq c #x24b6)
 701   (while (<= c #x24cf)
 702     (set-case-syntax-pair c (+ c 26) tbl)
 703     (modify-category-entry c ?l)
 704     (modify-category-entry (+ c 26) ?l)
 705     (setq c (1+ c)))
 706
 707   ;; Fullwidth Latin
 708   (setq c #xff21)
 709   (while (<= c #xff3a)
 710     (set-case-syntax-pair c (+ c #x20) tbl)
 711     (modify-category-entry c ?l)
 712     (modify-category-entry (+ c #x20) ?l)
 713     (setq c (1+ c)))
 714
 715   ;; Combining diacritics
 716   (modify-category-entry '(#x300 . #x362) ?^)
 717   ;; Combining marks
 718   (modify-category-entry '(#x20d0 . #x20e3) ?^)
 719
 720   ;; Fixme: syntax for symbols &c
 721   )
 722 \f
 723 ;; For each character set, put the information of the most proper
 724 ;; coding system to encode it by `preferred-coding-system' property.
 725
 726 ;; Fixme: should this be junked?
 727 (let ((l '((latin-iso8859-1     . iso-latin-1)
 728            (latin-iso8859-2     . iso-latin-2)
 729            (latin-iso8859-3     . iso-latin-3)
 730            (latin-iso8859-4     . iso-latin-4)
 731            (thai-tis620         . thai-tis620)
 732            (greek-iso8859-7     . greek-iso-8bit)
 733            (arabic-iso8859-6    . iso-2022-7bit)
 734            (hebrew-iso8859-8    . hebrew-iso-8bit)
 735            (katakana-jisx0201   . japanese-shift-jis)
 736            (latin-jisx0201      . japanese-shift-jis)
 737            (cyrillic-iso8859-5  . cyrillic-iso-8bit)
 738            (latin-iso8859-9     . iso-latin-5)
 739            (japanese-jisx0208-1978 . iso-2022-jp)
 740            (chinese-gb2312      . cn-gb-2312)
 741            (japanese-jisx0208   . iso-2022-jp)
 742            (korean-ksc5601      . iso-2022-kr)
 743            (japanese-jisx0212   . iso-2022-jp)
 744            (chinese-cns11643-1  . iso-2022-cn)
 745            (chinese-cns11643-2  . iso-2022-cn)
 746            (chinese-big5-1      . chinese-big5)
 747            (chinese-big5-2      . chinese-big5)
 748            (chinese-sisheng     . iso-2022-7bit)
 749            (ipa                 . iso-2022-7bit)
 750            (vietnamese-viscii-lower . vietnamese-viscii)
 751            (vietnamese-viscii-upper . vietnamese-viscii)
 752            (arabic-digit        . iso-2022-7bit)
 753            (arabic-1-column     . iso-2022-7bit)
 754            (lao                 . lao)
 755            (arabic-2-column     . iso-2022-7bit)
 756            (indian-is13194      . devanagari)
 757            (indian-glyph        . devanagari)
 758            (tibetan-1-column    . tibetan)
 759            (ethiopic            . iso-2022-7bit)
 760            (chinese-cns11643-3  . iso-2022-cn)
 761            (chinese-cns11643-4  . iso-2022-cn)
 762            (chinese-cns11643-5  . iso-2022-cn)
 763            (chinese-cns11643-6  . iso-2022-cn)
 764            (chinese-cns11643-7  . iso-2022-cn)
 765            (indian-2-column     . devanagari)
 766            (tibetan             . tibetan)
 767            (latin-iso8859-14    . iso-latin-8)
 768            (latin-iso8859-15    . iso-latin-9))))
 769   (while l
 770     (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
 771     (setq l (cdr l))))
 772
 773 \f
 774 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
 775 ;; SPACE and NEWLINE are already set.  Also put `nospace-between-words'
 776 ;; property on the charsets.
 777 (let ((l '(katakana-jisx0201
 778            japanese-jisx0208 japanese-jisx0212
 779            chinese-gb2312 chinese-big5-1 chinese-big5-2)))
 780   (while l
 781     ;;(aset auto-fill-chars (make-char (car l)) t)
 782     (put-charset-property (car l) 'nospace-between-words t)
 783     (setq l (cdr l))))
 784
 785 \f
 786 ;; CJK double width characters.
 787 (let ((l '((#x1100 . #x11FF)
 788            (#x2E80 . #x9FAF)
 789            (#xAC00 . #xD7AF)
 790            (#xF900 . #xFAFF)
 791            (#xFE30 . #xFE4F)
 792            (#xFF00 . #xFF5F)
 793            (#xFFE0 . #xFFEF))))
 794   (dolist (elt l)
 795     (set-char-table-range char-width-table
 796                           (cons (car elt) (cdr elt))
 797                           2)))
 798 ;; Fixme: Doing this affects non-CJK characters through unification,
 799 ;; but presumably CJK users expect those characters to be
 800 ;; double-width when using these charsets.
 801 ;; (map-charset-chars
 802 ;;  #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
 803 ;;  'japanese-jisx0208)
 804 ;; (map-charset-chars
 805 ;;  #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
 806 ;;  'japanese-jisx0212)
 807 ;; (map-charset-chars
 808 ;;  #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
 809 ;;  'japanese-jisx0213-1)
 810 ;; (map-charset-chars
 811 ;;  #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
 812 ;;  'japanese-jisx0213-2)
 813 ;; (map-charset-chars
 814 ;;  (lambda (range ignore) (set-char-table-range char-width-table range 2))
 815 ;;  'korean-ksc5601)
 816
 817 ;; Other double width
 818 (map-charset-chars
 819  (lambda (range ignore) (set-char-table-range char-width-table range 2))
 820  'ethiopic)
 821 (map-charset-chars
 822  (lambda (range ignore) (set-char-table-range char-width-table range 2))
 823  'tibetan)
 824 (map-charset-chars
 825  (lambda (range ignore) (set-char-table-range char-width-table range 2))
 826  'indian-2-column)
 827 (map-charset-chars
 828  (lambda (range ignore) (set-char-table-range char-width-table range 2))
 829  'arabic-2-column)
 830
 831 (optimize-char-table (standard-case-table))
 832 (optimize-char-table char-width-table)
 833 (optimize-char-table (standard-category-table))
 834 (optimize-char-table (standard-syntax-table))
 835
 836 ;; The Unicode blocks actually extend past some of these ranges with
 837 ;; undefined codepoints.
 838 (let ((script-list nil))
 839   (dolist
 840       (elt
 841        '((#x0000 #x007F latin)
 842          (#x00A0 #x036F latin)
 843          (#x0370 #x03E1 greek)
 844          (#x03E2 #x03EF coptic)
 845          (#x03F0 #x03F3 greek)
 846          (#x0400 #x04FF cyrillic)
 847          (#x0530 #x058F armenian)
 848          (#x0590 #x05FF hebrew)
 849          (#x0600 #x06FF arabic)
 850          (#x0700 #x074F syriac)
 851          (#x0780 #x07BF thaana)
 852          (#x0900 #x097F devanagari)
 853          (#x0980 #x09FF bengali)
 854          (#x0A00 #x0A7F gurmukhi)
 855          (#x0A80 #x0AFF gujarati)
 856          (#x0B00 #x0B7F oriya)
 857          (#x0B80 #x0BFF tamil)
 858          (#x0C00 #x0C7F telugu)
 859          (#x0C80 #x0CFF kannada)
 860          (#x0D00 #x0D7F malayalam)
 861          (#x0D80 #x0DFF sinhala)
 862          (#x0E00 #x0E5F thai)
 863          (#x0E80 #x0EDF lao)
 864          (#x0F00 #x0FFF tibetan)
 865          (#x1000 #x105F myanmar)
 866          (#x10A0 #x10FF georgian)
 867          (#x1100 #x11FF hangul)
 868          (#x1200 #x137F ethiopic)
 869          (#x13A0 #x13FF cherokee)
 870          (#x1400 #x167F canadian-aboriginal)
 871          (#x1680 #x169F ogham)
 872          (#x16A0 #x16FF runic)
 873          (#x1780 #x17FF khmer)
 874          (#x1800 #x18AF mongolian)
 875          (#x1E00 #x1EFF latin)
 876          (#x1F00 #x1FFF greek)
 877          (#x20A0 #x20AF currency)
 878          (#x2800 #x28FF braille)
 879          (#x2E80 #x2FDF han)
 880          (#x2FF0 #x2FFF ideographic-description)
 881          (#x3000 #x303F cjk-misc)
 882          (#x3040 #x30FF kana)
 883          (#x3100 #x312F bopomofo)
 884          (#x3130 #x318F hangul)
 885          (#x3190 #x319F kanbun)
 886          (#x31A0 #x31BF bopomofo)
 887          (#x3400 #x9FAF han)
 888          (#xA000 #xA4CF yi)
 889          (#xAC00 #xD7AF hangul)
 890          (#xF900 #xFA5F han)
 891          (#xFB1D #xFB4F hebrew)
 892          (#xFB50 #xFDFF arabic)
 893          (#xFE70 #xFEFC arabic)
 894          (#xFF00 #xFF5F cjk-misc)
 895          (#xFF61 #xFF9F kana)
 896          (#xFFE0 #xFFE6 cjk-misc)
 897          (#x20000 #x2AFFF han)
 898          (#x2F800 #x2FFFF han)))
 899     (set-char-table-range char-script-table
 900                           (cons (car elt) (nth 1 elt)) (nth 2 elt))
 901     (or (memq (nth 2 elt) script-list)
 902         (setq script-list (cons (nth 2 elt) script-list))))
 903   (set-char-table-extra-slot char-script-table 0 (nreverse script-list)))
 904
 905 \f
 906 ;;; Setting word boundary.
 907
 908 (defun next-word-boundary-han (pos limit)
 909   (if (<= pos limit)
 910       (save-excursion
 911         (goto-char pos)
 912         (looking-at "\\cC+")
 913         (goto-char (match-end 0))
 914         (if (looking-at "\\cH+")
 915             (goto-char (match-end 0)))
 916         (point))
 917     (while (and (> pos limit)
 918                 (eq (aref char-script-table (char-after (1- pos))) 'han))
 919       (setq pos (1- pos)))
 920     pos))
 921
 922 (defun next-word-boundary-kana (pos limit)
 923   (if (<= pos limit)
 924       (save-excursion
 925         (goto-char pos)
 926         (if (looking-at "\\cK+")
 927             (goto-char (match-end 0)))
 928         (if (looking-at "\\cH+")
 929             (goto-char (match-end 0)))
 930         (point))
 931     (let ((category-set (char-category-set (char-after pos)))
 932           category)
 933       (if (aref category-set ?K)
 934           (while (and (> pos limit)
 935                       (aref (char-category-set (char-after (1- pos))) ?K))
 936             (setq pos (1- pos)))
 937         (while (and (> pos limit)
 938                     (aref (setq category-set
 939                                 (char-category-set (char-after (1- pos)))) ?H))
 940           (setq pos (1- pos)))
 941         (setq category (cond ((aref category-set ?C) ?C)
 942                              ((aref category-set ?K) ?K)
 943                              ((aref category-set ?A) ?A)))
 944         (when category
 945           (setq pos (1- pos))
 946           (while (and (> pos limit)
 947                       (aref (char-category-set (char-after (1- pos)))
 948                             category))
 949             (setq pos (1- pos)))))
 950       pos)))
 951
 952 (map-char-table
 953  #'(lambda (char script)
 954      (cond ((eq script 'han)
 955             (set-char-table-range next-word-boundary-function-table
 956                                   char #'next-word-boundary-han))
 957            ((eq script 'kana)
 958             (set-char-table-range next-word-boundary-function-table
 959                                   char #'next-word-boundary-kana))))
 960  char-script-table)
 961
 962 (setq word-combining-categories
 963       '((?l . ?l)))
 964
 965 (setq word-separating-categories        ;  (2-byte character sets)
 966       '((?A . ?K)                       ; Alpha numeric - Katakana
 967         (?A . ?C)                       ; Alpha numeric - Chinese
 968         (?H . ?A)                       ; Hiragana - Alpha numeric
 969         (?H . ?K)                       ; Hiragana - Katakana
 970         (?H . ?C)                       ; Hiragana - Chinese
 971         (?K . ?A)                       ; Katakana - Alpha numeric
 972         (?K . ?C)                       ; Katakana - Chinese
 973         (?C . ?A)                       ; Chinese - Alpha numeric
 974         (?C . ?K)                       ; Chinese - Katakana
 975         ))
 976
 977 ;;; Local Variables:
 978 ;;; coding: utf-8-emacs
 979 ;;; End:
 980
 981 ;;; characters.el ends here