Add arch taglines
[bpt/emacs.git] / lisp / language / chinese.el
CommitLineData
60370d40 1;;; chinese.el --- support for Chinese -*- coding: iso-2022-7bit; -*-
4ed46869 2
4ed46869 3;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
fa526c4a 4;; Licensed to the Free Software Foundation.
4ed46869
KH
5
6;; Keywords: multilingual, Chinese
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
369314dc
KH
21;; along with GNU Emacs; see the file COPYING. If not, write to the
22;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23;; Boston, MA 02111-1307, USA.
4ed46869
KH
24
25;;; Commentary:
26
27;; For Chinese, three character sets GB2312, BIG5, and CNS11643 are
28;; supported.
29
30;;; Code:
31
32;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33;;; Chinese (general)
34;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
35
36(make-coding-system
4138c943 37 'iso-2022-cn 2 ?C
285aac85 38 "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)."
4138c943
KH
39 '(ascii
40 (nil chinese-gb2312 chinese-cns11643-1)
41 (nil chinese-cns11643-2)
42 nil
43 nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil
44 init-bol)
45 '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2)
46 (mime-charset . iso-2022-cn)))
47
48(define-coding-system-alias 'chinese-iso-7bit 'iso-2022-cn)
49
50(make-coding-system
51 'iso-2022-cn-ext 2 ?C
285aac85 52 "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)."
4ed46869
KH
53 '(ascii
54 (nil chinese-gb2312 chinese-cns11643-1)
55 (nil chinese-cns11643-2)
56 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
57 chinese-cns11643-6 chinese-cns11643-7)
f3f18123 58 nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil
a18aa841 59 init-bol)
4138c943
KH
60 '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2
61 chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
62 chinese-cns11643-6 chinese-cns11643-7)
63 (mime-charset . iso-2022-cn-ext)))
64
335a7ad7 65\f
4ed46869 66;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
a1506d29 67;;; Chinese GB2312 (simplified)
4ed46869
KH
68;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
69
70(make-coding-system
4b9121fc 71 'chinese-iso-8bit 2 ?c
285aac85 72 "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:GB2312)."
4138c943
KH
73 '(ascii chinese-gb2312 nil nil
74 nil ascii-eol ascii-cntl nil nil nil nil)
75 '((safe-charsets ascii chinese-gb2312)
ff890e66 76 (mime-charset . gb2312)))
4ed46869 77
71eabd24
RS
78(define-coding-system-alias 'cn-gb-2312 'chinese-iso-8bit)
79(define-coding-system-alias 'euc-china 'chinese-iso-8bit)
a18aa841 80(define-coding-system-alias 'euc-cn 'chinese-iso-8bit)
4f35555a
KH
81(define-coding-system-alias 'cn-gb 'chinese-iso-8bit)
82(define-coding-system-alias 'gb2312 'chinese-iso-8bit)
f3f18123 83
4ed46869 84(make-coding-system
4b9121fc 85 'chinese-hz 0 ?z
285aac85 86 "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)."
a18aa841 87 nil
4138c943
KH
88 '((safe-charsets ascii chinese-gb2312)
89 (mime-charset . hz-gb-2312)
90 (post-read-conversion . post-read-decode-hz)
91 (pre-write-conversion . pre-write-encode-hz)))
f3f18123 92
71eabd24
RS
93(define-coding-system-alias 'hz-gb-2312 'chinese-hz)
94(define-coding-system-alias 'hz 'chinese-hz)
4ed46869
KH
95
96(defun post-read-decode-hz (len)
69f24acf
KH
97 (let ((pos (point))
98 (buffer-modified-p (buffer-modified-p))
99 last-coding-system-used)
100 (prog1
101 (decode-hz-region pos (+ pos len))
102 (set-buffer-modified-p buffer-modified-p))))
4ed46869
KH
103
104(defun pre-write-encode-hz (from to)
1944b2e7 105 (let ((buf (current-buffer)))
d64a0ef7
KH
106 (set-buffer (generate-new-buffer " *temp*"))
107 (if (stringp from)
108 (insert from)
109 (insert-buffer-substring buf from to))
110 (let (last-coding-system-used)
111 (encode-hz-region 1 (point-max)))
4ed46869
KH
112 nil))
113
4ed46869 114(set-language-info-alist
a564ccf9 115 "Chinese-GB" '((charset chinese-gb2312 chinese-sisheng)
4138c943
KH
116 (coding-system chinese-iso-8bit iso-2022-cn chinese-hz)
117 (coding-priority chinese-iso-8bit chinese-big5 iso-2022-cn)
a564ccf9
KH
118 (input-method . "chinese-py-punct")
119 (features china-util)
fab8252e 120 (sample-text . "Chinese (\e$AVPND\e(B,\e$AFUM(;0\e(B,\e$A::So\e(B) \e$ADc:C\e(B")
4138c943
KH
121 (documentation . "Support for Chinese GB2312 character set."))
122 '("Chinese"))
4ed46869
KH
123
124;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
125;; Chinese BIG5 (traditional)
126;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
127
128(make-coding-system
285aac85
WL
129 'chinese-big5 3 ?B
130 "BIG5 8-bit encoding for Chinese (MIME:Big5)."
4138c943
KH
131 nil
132 '((safe-charsets ascii chinese-big5-1 chinese-big5-2)
ff890e66 133 (mime-charset . big5)
d4c97509
KH
134 (charset-origin-alist (chinese-big5-1 "BIG5" encode-big5-char)
135 (chinese-big5-2 "BIG5" encode-big5-char))))
4ed46869 136
71eabd24
RS
137(define-coding-system-alias 'big5 'chinese-big5)
138(define-coding-system-alias 'cn-big5 'chinese-big5)
f3f18123 139
4ed46869
KH
140;; Big5 font requires special encoding.
141(define-ccl-program ccl-encode-big5-font
142 `(0
143 ;; In: R0:chinese-big5-1 or chinese-big5-2
144 ;; R1:position code 1
145 ;; R2:position code 2
146 ;; Out: R1:font code point 1
147 ;; R2:font code point 2
148 ((r2 = ((((r1 - ?\x21) * 94) + r2) - ?\x21))
149 (if (r0 == ,(charset-id 'chinese-big5-2)) (r2 += 6280))
150 (r1 = ((r2 / 157) + ?\xA1))
151 (r2 %= 157)
152 (if (r2 < ?\x3F) (r2 += ?\x40) (r2 += ?\x62))))
153 "CCL program to encode a Big5 code to code point of Big5 font.")
154
155(setq font-ccl-encoder-alist
156 (cons (cons "big5" ccl-encode-big5-font) font-ccl-encoder-alist))
157
4ed46869 158(set-language-info-alist
a564ccf9 159 "Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2)
4138c943
KH
160 (coding-system chinese-big5 chinese-iso-7bit)
161 (coding-priority chinese-big5 iso-2022-cn chinese-iso-8bit)
a564ccf9
KH
162 (input-method . "chinese-py-punct-b5")
163 (features china-util)
fab8252e 164 (sample-text . "Cantonese (\e$(0GnM$\e(B,\e$(0N]0*Hd\e(B) \e$(0*/=(\e(B, \e$(0+$)p\e(B")
4138c943
KH
165 (documentation . "Support for Chinese Big5 character set."))
166 '("Chinese"))
4ed46869
KH
167
168;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
169;; Chinese CNS11643 (traditional)
170;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
171
285aac85
WL
172(defvar big5-to-cns (make-translation-table)
173 "Translation table for encoding to `euc-tw'.")
174;; Could have been done by china-util loaded before.
175(unless (get 'big5-to-cns 'translation-table)
176 (define-translation-table 'big5-to-cns big5-to-cns))
177
178(define-ccl-program ccl-decode-euc-tw
179 ;; CNS plane 1 needs either two or four bytes in EUC-TW encoding;
180 ;; CNS planes 2 to 7 always need four bytes. In internal encoding of
181 ;; Emacs, CNS planes 1 and 2 need three bytes, and planes 3 to 7 need
182 ;; four bytes. Thus a buffer magnification value of 2 (for both
183 ;; encoding and decoding) is sufficient.
184 `(2
185 ;; we don't have enough registers to hold all charset-ids
186 ((r4 = ,(charset-id 'chinese-cns11643-1))
187 (r5 = ,(charset-id 'chinese-cns11643-2))
188 (r6 = ,(charset-id 'chinese-cns11643-3))
189 (loop
190 (read-if (r0 < #x80)
191 ;; ASCII
192 (write-repeat r0)
193 ;; not ASCII
194 (if (r0 == #x8E)
195 ;; single shift
196 (read-if (r1 < #xA1)
197 ;; invalid byte
198 ((write r0)
199 (write-repeat r1))
200 (if (r1 > #xA7)
201 ;; invalid plane
202 ((write r0)
203 (write-repeat r1))
204 ;; OK, we have a plane
205 (read-if (r2 < #xA1)
206 ;; invalid first byte
207 ((write r0 r1)
208 (write-repeat r2))
209 (read-if (r3 < #xA1)
210 ;; invalid second byte
211 ((write r0 r1 r2)
212 (write-repeat r3))
213 ;; CNS 1-7, finally
214 ((branch (r1 - #xA1)
215 (r1 = r4)
216 (r1 = r5)
217 (r1 = r6)
218 (r1 = ,(charset-id 'chinese-cns11643-4))
219 (r1 = ,(charset-id 'chinese-cns11643-5))
220 (r1 = ,(charset-id 'chinese-cns11643-6))
221 (r1 = ,(charset-id 'chinese-cns11643-7)))
222 (r2 = ((((r2 - #x80) << 7) + r3) - #x80))
223 (write-multibyte-character r1 r2)
224 (repeat))))))
225 ;; standard EUC
226 (if (r0 < #xA1)
227 ;; invalid first byte
228 (write-repeat r0)
229 (read-if (r1 < #xA1)
230 ;; invalid second byte
231 ((write r0)
232 (write-repeat r1))
233 ;; CNS 1, finally
234 ((r1 = ((((r0 - #x80) << 7) + r1) - #x80))
235 (write-multibyte-character r4 r1)
236 (repeat)))))))))
237 "CCL program to decode EUC-TW encoding."
238)
239
240(define-ccl-program ccl-encode-euc-tw
241 `(2
242 ;; we don't have enough registers to hold all charset-ids
243 ((r2 = ,(charset-id 'ascii))
244 (r3 = ,(charset-id 'chinese-big5-1))
245 (r4 = ,(charset-id 'chinese-big5-2))
246 (r5 = ,(charset-id 'chinese-cns11643-1))
247 (r6 = ,(charset-id 'chinese-cns11643-2))
248 (loop
249 (read-multibyte-character r0 r1)
250 (if (r0 == r2)
251 (write-repeat r1)
252 (;; Big 5 encoded characters are first translated to CNS
253 (if (r0 == r3)
254 (translate-character big5-to-cns r0 r1)
255 (if (r0 == r4)
256 (translate-character big5-to-cns r0 r1)))
257 (if (r0 == r5)
258 (r0 = #xA1)
259 (if (r0 == r6)
260 (r0 = #xA2)
261 (if (r0 == ,(charset-id 'chinese-cns11643-3))
262 (r0 = #xA3)
263 (if (r0 == ,(charset-id 'chinese-cns11643-4))
264 (r0 = #xA4)
265 (if (r0 == ,(charset-id 'chinese-cns11643-5))
266 (r0 = #xA5)
267 (if (r0 == ,(charset-id 'chinese-cns11643-6))
268 (r0 = #xA6)
269 (if (r0 == ,(charset-id 'chinese-cns11643-7))
270 (r0 = #xA7)
271 ;; not CNS. We use a dummy character which
272 ;; can't occur in EUC-TW encoding to indicate
273 ;; this.
274 (write-repeat #xFF))))))))))
275 (if (r0 != #xA1)
276 ;; single shift and CNS plane
277 ((write #x8E)
278 (write r0)))
279 (write ((r1 >> 7) + #x80))
280 (write ((r1 % #x80) + #x80))
281 (repeat))))
282 "CCL program to encode EUC-TW encoding."
283)
284
285(defun euc-tw-pre-write-conversion (beg end)
286 "Semi-dummy pre-write function effectively to autoload china-util."
287 ;; Ensure translation table is loaded.
288 (require 'china-util)
289 ;; Don't do this again.
290 (coding-system-put 'euc-tw 'pre-write-conversion nil)
291 nil)
292
293(make-coding-system
294 'euc-tw 4 ?Z
295 "ISO 2022 based EUC encoding for Chinese CNS11643.
296Big5 encoding is accepted for input also (which is then converted to CNS)."
297 '(ccl-decode-euc-tw . ccl-encode-euc-tw)
298 '((safe-charsets ascii
299 chinese-big5-1
300 chinese-big5-2
301 chinese-cns11643-1
302 chinese-cns11643-2
303 chinese-cns11643-3
304 chinese-cns11643-4
305 chinese-cns11643-5
306 chinese-cns11643-6
307 chinese-cns11643-7)
308 (valid-codes (0 . 255))
309 (pre-write-conversion . euc-tw-pre-write-conversion)))
310
311(define-coding-system-alias 'euc-taiwan 'euc-tw)
312
4ed46869 313(set-language-info-alist
a564ccf9 314 "Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2
4138c943
KH
315 chinese-cns11643-3 chinese-cns11643-4
316 chinese-cns11643-5 chinese-cns11643-6
317 chinese-cns11643-7)
285aac85
WL
318 (coding-system iso-2022-cn euc-tw)
319 (coding-priority iso-2022-cn euc-tw chinese-big5
320 chinese-iso-8bit)
a564ccf9
KH
321 (features china-util)
322 (input-method . "chinese-cns-quick")
285aac85 323 (documentation . "\
9da20928 324Support for Chinese CNS character sets. Note that the EUC-TW coding system
285aac85 325accepts Big5 for input also (which is then converted to CNS)."))
4138c943 326 '("Chinese"))
4ed46869 327
9da20928
DL
328(set-language-info-alist
329 "Chinese-EUC-TW" '((charset chinese-cns11643-1 chinese-cns11643-2
330 chinese-cns11643-3 chinese-cns11643-4
331 chinese-cns11643-5 chinese-cns11643-6
332 chinese-cns11643-7 chinese-big5-1 chinese-big5-2)
333 (coding-system euc-tw iso-2022-cn)
334 (coding-priority euc-tw chinese-big5 iso-2022-cn
335 chinese-iso-8bit)
336 (features china-util)
337 (input-method . "chinese-cns-quick")
338 (documentation . "\
339Support for Chinese, prefering the EUC-TW character set. Note that
340the EUC-TW coding system accepts Big5 for input also (which is then
341converted to CNS)."))
342 '("Chinese"))
343
41da80b1
DL
344(provide 'chinese)
345
ab5796a9 346;;; arch-tag: b82fcf7a-84f6-4e0b-b38c-1742dac0e09f
4ed46869 347;;; chinese.el ends here