Update AIST copyright years.
[bpt/emacs.git] / lisp / language / chinese.el
CommitLineData
60370d40 1;;; chinese.el --- support for Chinese -*- coding: iso-2022-7bit; -*-
4ed46869 2
d4877ac1
GM
3;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006
4;; Free Software Foundation, Inc.
7976eda0
KH
5;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6;; 2005, 2006
eaa61218
KH
7;; National Institute of Advanced Industrial Science and Technology (AIST)
8;; Registration Number H14PRO021
4ed46869
KH
9
10;; Keywords: multilingual, Chinese
11
12;; This file is part of GNU Emacs.
13
14;; GNU Emacs is free software; you can redistribute it and/or modify
15;; it under the terms of the GNU General Public License as published by
16;; the Free Software Foundation; either version 2, or (at your option)
17;; any later version.
18
19;; GNU Emacs is distributed in the hope that it will be useful,
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
369314dc 25;; along with GNU Emacs; see the file COPYING. If not, write to the
3a35cf56
LK
26;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
27;; Boston, MA 02110-1301, USA.
4ed46869
KH
28
29;;; Commentary:
30
31;; For Chinese, three character sets GB2312, BIG5, and CNS11643 are
32;; supported.
33
34;;; Code:
35
36;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
37;;; Chinese (general)
38;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
39
40(make-coding-system
4138c943 41 'iso-2022-cn 2 ?C
285aac85 42 "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)."
4138c943
KH
43 '(ascii
44 (nil chinese-gb2312 chinese-cns11643-1)
45 (nil chinese-cns11643-2)
46 nil
47 nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil
48 init-bol)
49 '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2)
50 (mime-charset . iso-2022-cn)))
51
52(define-coding-system-alias 'chinese-iso-7bit 'iso-2022-cn)
53
54(make-coding-system
55 'iso-2022-cn-ext 2 ?C
285aac85 56 "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)."
4ed46869
KH
57 '(ascii
58 (nil chinese-gb2312 chinese-cns11643-1)
59 (nil chinese-cns11643-2)
60 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
61 chinese-cns11643-6 chinese-cns11643-7)
f3f18123 62 nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil
a18aa841 63 init-bol)
4138c943
KH
64 '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2
65 chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
66 chinese-cns11643-6 chinese-cns11643-7)
67 (mime-charset . iso-2022-cn-ext)))
68
335a7ad7 69\f
4ed46869 70;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
a1506d29 71;;; Chinese GB2312 (simplified)
4ed46869
KH
72;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
73
74(make-coding-system
4b9121fc 75 'chinese-iso-8bit 2 ?c
285aac85 76 "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:GB2312)."
4138c943
KH
77 '(ascii chinese-gb2312 nil nil
78 nil ascii-eol ascii-cntl nil nil nil nil)
79 '((safe-charsets ascii chinese-gb2312)
ff890e66 80 (mime-charset . gb2312)))
4ed46869 81
71eabd24
RS
82(define-coding-system-alias 'cn-gb-2312 'chinese-iso-8bit)
83(define-coding-system-alias 'euc-china 'chinese-iso-8bit)
a18aa841 84(define-coding-system-alias 'euc-cn 'chinese-iso-8bit)
4f35555a
KH
85(define-coding-system-alias 'cn-gb 'chinese-iso-8bit)
86(define-coding-system-alias 'gb2312 'chinese-iso-8bit)
6eac8f52 87(define-coding-system-alias 'cp936 'chinese-iso-8bit)
f3f18123 88
4ed46869 89(make-coding-system
4b9121fc 90 'chinese-hz 0 ?z
285aac85 91 "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)."
a18aa841 92 nil
4138c943
KH
93 '((safe-charsets ascii chinese-gb2312)
94 (mime-charset . hz-gb-2312)
95 (post-read-conversion . post-read-decode-hz)
96 (pre-write-conversion . pre-write-encode-hz)))
f3f18123 97
71eabd24
RS
98(define-coding-system-alias 'hz-gb-2312 'chinese-hz)
99(define-coding-system-alias 'hz 'chinese-hz)
4ed46869
KH
100
101(defun post-read-decode-hz (len)
69f24acf
KH
102 (let ((pos (point))
103 (buffer-modified-p (buffer-modified-p))
104 last-coding-system-used)
105 (prog1
106 (decode-hz-region pos (+ pos len))
107 (set-buffer-modified-p buffer-modified-p))))
4ed46869
KH
108
109(defun pre-write-encode-hz (from to)
1944b2e7 110 (let ((buf (current-buffer)))
d64a0ef7
KH
111 (set-buffer (generate-new-buffer " *temp*"))
112 (if (stringp from)
113 (insert from)
114 (insert-buffer-substring buf from to))
115 (let (last-coding-system-used)
116 (encode-hz-region 1 (point-max)))
4ed46869
KH
117 nil))
118
4ed46869 119(set-language-info-alist
a564ccf9 120 "Chinese-GB" '((charset chinese-gb2312 chinese-sisheng)
4138c943
KH
121 (coding-system chinese-iso-8bit iso-2022-cn chinese-hz)
122 (coding-priority chinese-iso-8bit chinese-big5 iso-2022-cn)
a564ccf9
KH
123 (input-method . "chinese-py-punct")
124 (features china-util)
fab8252e 125 (sample-text . "Chinese (\e$AVPND\e(B,\e$AFUM(;0\e(B,\e$A::So\e(B) \e$ADc:C\e(B")
04c00c85
EZ
126 (documentation . "Support for Chinese GB2312 character set.")
127 (tutorial . "TUTORIAL.cn"))
4138c943 128 '("Chinese"))
4ed46869
KH
129
130;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
131;; Chinese BIG5 (traditional)
132;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
133
134(make-coding-system
285aac85
WL
135 'chinese-big5 3 ?B
136 "BIG5 8-bit encoding for Chinese (MIME:Big5)."
4138c943
KH
137 nil
138 '((safe-charsets ascii chinese-big5-1 chinese-big5-2)
ff890e66 139 (mime-charset . big5)
d4c97509
KH
140 (charset-origin-alist (chinese-big5-1 "BIG5" encode-big5-char)
141 (chinese-big5-2 "BIG5" encode-big5-char))))
4ed46869 142
71eabd24
RS
143(define-coding-system-alias 'big5 'chinese-big5)
144(define-coding-system-alias 'cn-big5 'chinese-big5)
6eac8f52 145(define-coding-system-alias 'cp950 'chinese-big5)
f3f18123 146
4ed46869
KH
147;; Big5 font requires special encoding.
148(define-ccl-program ccl-encode-big5-font
149 `(0
150 ;; In: R0:chinese-big5-1 or chinese-big5-2
151 ;; R1:position code 1
152 ;; R2:position code 2
153 ;; Out: R1:font code point 1
154 ;; R2:font code point 2
155 ((r2 = ((((r1 - ?\x21) * 94) + r2) - ?\x21))
156 (if (r0 == ,(charset-id 'chinese-big5-2)) (r2 += 6280))
157 (r1 = ((r2 / 157) + ?\xA1))
158 (r2 %= 157)
159 (if (r2 < ?\x3F) (r2 += ?\x40) (r2 += ?\x62))))
160 "CCL program to encode a Big5 code to code point of Big5 font.")
161
162(setq font-ccl-encoder-alist
163 (cons (cons "big5" ccl-encode-big5-font) font-ccl-encoder-alist))
164
4ed46869 165(set-language-info-alist
a564ccf9 166 "Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2)
4138c943
KH
167 (coding-system chinese-big5 chinese-iso-7bit)
168 (coding-priority chinese-big5 iso-2022-cn chinese-iso-8bit)
a564ccf9
KH
169 (input-method . "chinese-py-punct-b5")
170 (features china-util)
fab8252e 171 (sample-text . "Cantonese (\e$(0GnM$\e(B,\e$(0N]0*Hd\e(B) \e$(0*/=(\e(B, \e$(0+$)p\e(B")
04c00c85
EZ
172 (documentation . "Support for Chinese Big5 character set.")
173 (tutorial . "TUTORIAL.zh"))
4138c943 174 '("Chinese"))
4ed46869
KH
175
176;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
177;; Chinese CNS11643 (traditional)
178;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
179
285aac85
WL
180(defvar big5-to-cns (make-translation-table)
181 "Translation table for encoding to `euc-tw'.")
182;; Could have been done by china-util loaded before.
183(unless (get 'big5-to-cns 'translation-table)
184 (define-translation-table 'big5-to-cns big5-to-cns))
185
186(define-ccl-program ccl-decode-euc-tw
187 ;; CNS plane 1 needs either two or four bytes in EUC-TW encoding;
188 ;; CNS planes 2 to 7 always need four bytes. In internal encoding of
189 ;; Emacs, CNS planes 1 and 2 need three bytes, and planes 3 to 7 need
190 ;; four bytes. Thus a buffer magnification value of 2 (for both
191 ;; encoding and decoding) is sufficient.
192 `(2
193 ;; we don't have enough registers to hold all charset-ids
194 ((r4 = ,(charset-id 'chinese-cns11643-1))
195 (r5 = ,(charset-id 'chinese-cns11643-2))
196 (r6 = ,(charset-id 'chinese-cns11643-3))
197 (loop
198 (read-if (r0 < #x80)
199 ;; ASCII
200 (write-repeat r0)
201 ;; not ASCII
202 (if (r0 == #x8E)
203 ;; single shift
204 (read-if (r1 < #xA1)
205 ;; invalid byte
206 ((write r0)
207 (write-repeat r1))
208 (if (r1 > #xA7)
209 ;; invalid plane
210 ((write r0)
211 (write-repeat r1))
212 ;; OK, we have a plane
213 (read-if (r2 < #xA1)
214 ;; invalid first byte
215 ((write r0 r1)
216 (write-repeat r2))
217 (read-if (r3 < #xA1)
218 ;; invalid second byte
219 ((write r0 r1 r2)
220 (write-repeat r3))
221 ;; CNS 1-7, finally
222 ((branch (r1 - #xA1)
223 (r1 = r4)
224 (r1 = r5)
225 (r1 = r6)
226 (r1 = ,(charset-id 'chinese-cns11643-4))
227 (r1 = ,(charset-id 'chinese-cns11643-5))
228 (r1 = ,(charset-id 'chinese-cns11643-6))
229 (r1 = ,(charset-id 'chinese-cns11643-7)))
230 (r2 = ((((r2 - #x80) << 7) + r3) - #x80))
231 (write-multibyte-character r1 r2)
232 (repeat))))))
233 ;; standard EUC
234 (if (r0 < #xA1)
235 ;; invalid first byte
236 (write-repeat r0)
237 (read-if (r1 < #xA1)
238 ;; invalid second byte
239 ((write r0)
240 (write-repeat r1))
241 ;; CNS 1, finally
242 ((r1 = ((((r0 - #x80) << 7) + r1) - #x80))
243 (write-multibyte-character r4 r1)
244 (repeat)))))))))
245 "CCL program to decode EUC-TW encoding."
246)
247
248(define-ccl-program ccl-encode-euc-tw
249 `(2
250 ;; we don't have enough registers to hold all charset-ids
251 ((r2 = ,(charset-id 'ascii))
252 (r3 = ,(charset-id 'chinese-big5-1))
253 (r4 = ,(charset-id 'chinese-big5-2))
254 (r5 = ,(charset-id 'chinese-cns11643-1))
255 (r6 = ,(charset-id 'chinese-cns11643-2))
256 (loop
257 (read-multibyte-character r0 r1)
258 (if (r0 == r2)
259 (write-repeat r1)
260 (;; Big 5 encoded characters are first translated to CNS
261 (if (r0 == r3)
262 (translate-character big5-to-cns r0 r1)
263 (if (r0 == r4)
264 (translate-character big5-to-cns r0 r1)))
265 (if (r0 == r5)
266 (r0 = #xA1)
267 (if (r0 == r6)
268 (r0 = #xA2)
269 (if (r0 == ,(charset-id 'chinese-cns11643-3))
270 (r0 = #xA3)
271 (if (r0 == ,(charset-id 'chinese-cns11643-4))
272 (r0 = #xA4)
273 (if (r0 == ,(charset-id 'chinese-cns11643-5))
274 (r0 = #xA5)
275 (if (r0 == ,(charset-id 'chinese-cns11643-6))
276 (r0 = #xA6)
277 (if (r0 == ,(charset-id 'chinese-cns11643-7))
278 (r0 = #xA7)
279 ;; not CNS. We use a dummy character which
280 ;; can't occur in EUC-TW encoding to indicate
281 ;; this.
282 (write-repeat #xFF))))))))))
283 (if (r0 != #xA1)
284 ;; single shift and CNS plane
285 ((write #x8E)
286 (write r0)))
287 (write ((r1 >> 7) + #x80))
288 (write ((r1 % #x80) + #x80))
289 (repeat))))
290 "CCL program to encode EUC-TW encoding."
291)
292
293(defun euc-tw-pre-write-conversion (beg end)
294 "Semi-dummy pre-write function effectively to autoload china-util."
295 ;; Ensure translation table is loaded.
296 (require 'china-util)
297 ;; Don't do this again.
298 (coding-system-put 'euc-tw 'pre-write-conversion nil)
299 nil)
300
301(make-coding-system
302 'euc-tw 4 ?Z
303 "ISO 2022 based EUC encoding for Chinese CNS11643.
304Big5 encoding is accepted for input also (which is then converted to CNS)."
305 '(ccl-decode-euc-tw . ccl-encode-euc-tw)
306 '((safe-charsets ascii
307 chinese-big5-1
308 chinese-big5-2
309 chinese-cns11643-1
310 chinese-cns11643-2
311 chinese-cns11643-3
312 chinese-cns11643-4
313 chinese-cns11643-5
314 chinese-cns11643-6
315 chinese-cns11643-7)
316 (valid-codes (0 . 255))
317 (pre-write-conversion . euc-tw-pre-write-conversion)))
318
319(define-coding-system-alias 'euc-taiwan 'euc-tw)
320
4ed46869 321(set-language-info-alist
a564ccf9 322 "Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2
4138c943
KH
323 chinese-cns11643-3 chinese-cns11643-4
324 chinese-cns11643-5 chinese-cns11643-6
325 chinese-cns11643-7)
285aac85
WL
326 (coding-system iso-2022-cn euc-tw)
327 (coding-priority iso-2022-cn euc-tw chinese-big5
328 chinese-iso-8bit)
a564ccf9
KH
329 (features china-util)
330 (input-method . "chinese-cns-quick")
285aac85 331 (documentation . "\
9da20928 332Support for Chinese CNS character sets. Note that the EUC-TW coding system
285aac85 333accepts Big5 for input also (which is then converted to CNS)."))
4138c943 334 '("Chinese"))
4ed46869 335
9da20928
DL
336(set-language-info-alist
337 "Chinese-EUC-TW" '((charset chinese-cns11643-1 chinese-cns11643-2
338 chinese-cns11643-3 chinese-cns11643-4
339 chinese-cns11643-5 chinese-cns11643-6
340 chinese-cns11643-7 chinese-big5-1 chinese-big5-2)
341 (coding-system euc-tw iso-2022-cn)
342 (coding-priority euc-tw chinese-big5 iso-2022-cn
343 chinese-iso-8bit)
344 (features china-util)
345 (input-method . "chinese-cns-quick")
346 (documentation . "\
347Support for Chinese, prefering the EUC-TW character set. Note that
348the EUC-TW coding system accepts Big5 for input also (which is then
349converted to CNS)."))
350 '("Chinese"))
351
41da80b1
DL
352(provide 'chinese)
353
ab5796a9 354;;; arch-tag: b82fcf7a-84f6-4e0b-b38c-1742dac0e09f
4ed46869 355;;; chinese.el ends here