Commit | Line | Data |
---|---|---|
60370d40 | 1 | ;;; chinese.el --- support for Chinese -*- coding: iso-2022-7bit; -*- |
4ed46869 | 2 | |
d4877ac1 GM |
3 | ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006 |
4 | ;; Free Software Foundation, Inc. | |
7976eda0 KH |
5 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
6 | ;; 2005, 2006 | |
eaa61218 KH |
7 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
8 | ;; Registration Number H14PRO021 | |
4ed46869 KH |
9 | |
10 | ;; Keywords: multilingual, Chinese | |
11 | ||
12 | ;; This file is part of GNU Emacs. | |
13 | ||
14 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
15 | ;; it under the terms of the GNU General Public License as published by | |
16 | ;; the Free Software Foundation; either version 2, or (at your option) | |
17 | ;; any later version. | |
18 | ||
19 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
20 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
22 | ;; GNU General Public License for more details. | |
23 | ||
24 | ;; You should have received a copy of the GNU General Public License | |
369314dc | 25 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
3a35cf56 LK |
26 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
27 | ;; Boston, MA 02110-1301, USA. | |
4ed46869 KH |
28 | |
29 | ;;; Commentary: | |
30 | ||
31 | ;; For Chinese, three character sets GB2312, BIG5, and CNS11643 are | |
32 | ;; supported. | |
33 | ||
34 | ;;; Code: | |
35 | ||
36 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
37 | ;;; Chinese (general) | |
38 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
39 | ||
40 | (make-coding-system | |
4138c943 | 41 | 'iso-2022-cn 2 ?C |
285aac85 | 42 | "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)." |
4138c943 KH |
43 | '(ascii |
44 | (nil chinese-gb2312 chinese-cns11643-1) | |
45 | (nil chinese-cns11643-2) | |
46 | nil | |
47 | nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil | |
48 | init-bol) | |
49 | '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2) | |
50 | (mime-charset . iso-2022-cn))) | |
51 | ||
52 | (define-coding-system-alias 'chinese-iso-7bit 'iso-2022-cn) | |
53 | ||
54 | (make-coding-system | |
55 | 'iso-2022-cn-ext 2 ?C | |
285aac85 | 56 | "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)." |
4ed46869 KH |
57 | '(ascii |
58 | (nil chinese-gb2312 chinese-cns11643-1) | |
59 | (nil chinese-cns11643-2) | |
60 | (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 | |
61 | chinese-cns11643-6 chinese-cns11643-7) | |
f3f18123 | 62 | nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil |
a18aa841 | 63 | init-bol) |
4138c943 KH |
64 | '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2 |
65 | chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 | |
66 | chinese-cns11643-6 chinese-cns11643-7) | |
67 | (mime-charset . iso-2022-cn-ext))) | |
68 | ||
335a7ad7 | 69 | \f |
4ed46869 | 70 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
a1506d29 | 71 | ;;; Chinese GB2312 (simplified) |
4ed46869 KH |
72 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
73 | ||
74 | (make-coding-system | |
4b9121fc | 75 | 'chinese-iso-8bit 2 ?c |
285aac85 | 76 | "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:GB2312)." |
4138c943 KH |
77 | '(ascii chinese-gb2312 nil nil |
78 | nil ascii-eol ascii-cntl nil nil nil nil) | |
79 | '((safe-charsets ascii chinese-gb2312) | |
ff890e66 | 80 | (mime-charset . gb2312))) |
4ed46869 | 81 | |
71eabd24 RS |
82 | (define-coding-system-alias 'cn-gb-2312 'chinese-iso-8bit) |
83 | (define-coding-system-alias 'euc-china 'chinese-iso-8bit) | |
a18aa841 | 84 | (define-coding-system-alias 'euc-cn 'chinese-iso-8bit) |
4f35555a KH |
85 | (define-coding-system-alias 'cn-gb 'chinese-iso-8bit) |
86 | (define-coding-system-alias 'gb2312 'chinese-iso-8bit) | |
6eac8f52 | 87 | (define-coding-system-alias 'cp936 'chinese-iso-8bit) |
f3f18123 | 88 | |
4ed46869 | 89 | (make-coding-system |
4b9121fc | 90 | 'chinese-hz 0 ?z |
285aac85 | 91 | "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)." |
a18aa841 | 92 | nil |
4138c943 KH |
93 | '((safe-charsets ascii chinese-gb2312) |
94 | (mime-charset . hz-gb-2312) | |
95 | (post-read-conversion . post-read-decode-hz) | |
96 | (pre-write-conversion . pre-write-encode-hz))) | |
f3f18123 | 97 | |
71eabd24 RS |
98 | (define-coding-system-alias 'hz-gb-2312 'chinese-hz) |
99 | (define-coding-system-alias 'hz 'chinese-hz) | |
4ed46869 KH |
100 | |
101 | (defun post-read-decode-hz (len) | |
69f24acf KH |
102 | (let ((pos (point)) |
103 | (buffer-modified-p (buffer-modified-p)) | |
104 | last-coding-system-used) | |
105 | (prog1 | |
106 | (decode-hz-region pos (+ pos len)) | |
107 | (set-buffer-modified-p buffer-modified-p)))) | |
4ed46869 KH |
108 | |
109 | (defun pre-write-encode-hz (from to) | |
1944b2e7 | 110 | (let ((buf (current-buffer))) |
d64a0ef7 KH |
111 | (set-buffer (generate-new-buffer " *temp*")) |
112 | (if (stringp from) | |
113 | (insert from) | |
114 | (insert-buffer-substring buf from to)) | |
115 | (let (last-coding-system-used) | |
116 | (encode-hz-region 1 (point-max))) | |
4ed46869 KH |
117 | nil)) |
118 | ||
4ed46869 | 119 | (set-language-info-alist |
a564ccf9 | 120 | "Chinese-GB" '((charset chinese-gb2312 chinese-sisheng) |
4138c943 KH |
121 | (coding-system chinese-iso-8bit iso-2022-cn chinese-hz) |
122 | (coding-priority chinese-iso-8bit chinese-big5 iso-2022-cn) | |
a564ccf9 KH |
123 | (input-method . "chinese-py-punct") |
124 | (features china-util) | |
fab8252e | 125 | (sample-text . "Chinese (\e$AVPND\e(B,\e$AFUM(;0\e(B,\e$A::So\e(B) \e$ADc:C\e(B") |
04c00c85 EZ |
126 | (documentation . "Support for Chinese GB2312 character set.") |
127 | (tutorial . "TUTORIAL.cn")) | |
4138c943 | 128 | '("Chinese")) |
4ed46869 KH |
129 | |
130 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
131 | ;; Chinese BIG5 (traditional) | |
132 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
133 | ||
134 | (make-coding-system | |
285aac85 WL |
135 | 'chinese-big5 3 ?B |
136 | "BIG5 8-bit encoding for Chinese (MIME:Big5)." | |
4138c943 KH |
137 | nil |
138 | '((safe-charsets ascii chinese-big5-1 chinese-big5-2) | |
ff890e66 | 139 | (mime-charset . big5) |
d4c97509 KH |
140 | (charset-origin-alist (chinese-big5-1 "BIG5" encode-big5-char) |
141 | (chinese-big5-2 "BIG5" encode-big5-char)))) | |
4ed46869 | 142 | |
71eabd24 RS |
143 | (define-coding-system-alias 'big5 'chinese-big5) |
144 | (define-coding-system-alias 'cn-big5 'chinese-big5) | |
6eac8f52 | 145 | (define-coding-system-alias 'cp950 'chinese-big5) |
f3f18123 | 146 | |
4ed46869 KH |
147 | ;; Big5 font requires special encoding. |
148 | (define-ccl-program ccl-encode-big5-font | |
149 | `(0 | |
150 | ;; In: R0:chinese-big5-1 or chinese-big5-2 | |
151 | ;; R1:position code 1 | |
152 | ;; R2:position code 2 | |
153 | ;; Out: R1:font code point 1 | |
154 | ;; R2:font code point 2 | |
155 | ((r2 = ((((r1 - ?\x21) * 94) + r2) - ?\x21)) | |
156 | (if (r0 == ,(charset-id 'chinese-big5-2)) (r2 += 6280)) | |
157 | (r1 = ((r2 / 157) + ?\xA1)) | |
158 | (r2 %= 157) | |
159 | (if (r2 < ?\x3F) (r2 += ?\x40) (r2 += ?\x62)))) | |
160 | "CCL program to encode a Big5 code to code point of Big5 font.") | |
161 | ||
162 | (setq font-ccl-encoder-alist | |
163 | (cons (cons "big5" ccl-encode-big5-font) font-ccl-encoder-alist)) | |
164 | ||
4ed46869 | 165 | (set-language-info-alist |
a564ccf9 | 166 | "Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2) |
4138c943 KH |
167 | (coding-system chinese-big5 chinese-iso-7bit) |
168 | (coding-priority chinese-big5 iso-2022-cn chinese-iso-8bit) | |
a564ccf9 KH |
169 | (input-method . "chinese-py-punct-b5") |
170 | (features china-util) | |
fab8252e | 171 | (sample-text . "Cantonese (\e$(0GnM$\e(B,\e$(0N]0*Hd\e(B) \e$(0*/=(\e(B, \e$(0+$)p\e(B") |
04c00c85 EZ |
172 | (documentation . "Support for Chinese Big5 character set.") |
173 | (tutorial . "TUTORIAL.zh")) | |
4138c943 | 174 | '("Chinese")) |
4ed46869 KH |
175 | |
176 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
177 | ;; Chinese CNS11643 (traditional) | |
178 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
179 | ||
285aac85 WL |
180 | (defvar big5-to-cns (make-translation-table) |
181 | "Translation table for encoding to `euc-tw'.") | |
182 | ;; Could have been done by china-util loaded before. | |
183 | (unless (get 'big5-to-cns 'translation-table) | |
184 | (define-translation-table 'big5-to-cns big5-to-cns)) | |
185 | ||
186 | (define-ccl-program ccl-decode-euc-tw | |
187 | ;; CNS plane 1 needs either two or four bytes in EUC-TW encoding; | |
188 | ;; CNS planes 2 to 7 always need four bytes. In internal encoding of | |
189 | ;; Emacs, CNS planes 1 and 2 need three bytes, and planes 3 to 7 need | |
190 | ;; four bytes. Thus a buffer magnification value of 2 (for both | |
191 | ;; encoding and decoding) is sufficient. | |
192 | `(2 | |
193 | ;; we don't have enough registers to hold all charset-ids | |
194 | ((r4 = ,(charset-id 'chinese-cns11643-1)) | |
195 | (r5 = ,(charset-id 'chinese-cns11643-2)) | |
196 | (r6 = ,(charset-id 'chinese-cns11643-3)) | |
197 | (loop | |
198 | (read-if (r0 < #x80) | |
199 | ;; ASCII | |
200 | (write-repeat r0) | |
201 | ;; not ASCII | |
202 | (if (r0 == #x8E) | |
203 | ;; single shift | |
204 | (read-if (r1 < #xA1) | |
205 | ;; invalid byte | |
206 | ((write r0) | |
207 | (write-repeat r1)) | |
208 | (if (r1 > #xA7) | |
209 | ;; invalid plane | |
210 | ((write r0) | |
211 | (write-repeat r1)) | |
212 | ;; OK, we have a plane | |
213 | (read-if (r2 < #xA1) | |
214 | ;; invalid first byte | |
215 | ((write r0 r1) | |
216 | (write-repeat r2)) | |
217 | (read-if (r3 < #xA1) | |
218 | ;; invalid second byte | |
219 | ((write r0 r1 r2) | |
220 | (write-repeat r3)) | |
221 | ;; CNS 1-7, finally | |
222 | ((branch (r1 - #xA1) | |
223 | (r1 = r4) | |
224 | (r1 = r5) | |
225 | (r1 = r6) | |
226 | (r1 = ,(charset-id 'chinese-cns11643-4)) | |
227 | (r1 = ,(charset-id 'chinese-cns11643-5)) | |
228 | (r1 = ,(charset-id 'chinese-cns11643-6)) | |
229 | (r1 = ,(charset-id 'chinese-cns11643-7))) | |
230 | (r2 = ((((r2 - #x80) << 7) + r3) - #x80)) | |
231 | (write-multibyte-character r1 r2) | |
232 | (repeat)))))) | |
233 | ;; standard EUC | |
234 | (if (r0 < #xA1) | |
235 | ;; invalid first byte | |
236 | (write-repeat r0) | |
237 | (read-if (r1 < #xA1) | |
238 | ;; invalid second byte | |
239 | ((write r0) | |
240 | (write-repeat r1)) | |
241 | ;; CNS 1, finally | |
242 | ((r1 = ((((r0 - #x80) << 7) + r1) - #x80)) | |
243 | (write-multibyte-character r4 r1) | |
244 | (repeat))))))))) | |
245 | "CCL program to decode EUC-TW encoding." | |
246 | ) | |
247 | ||
248 | (define-ccl-program ccl-encode-euc-tw | |
249 | `(2 | |
250 | ;; we don't have enough registers to hold all charset-ids | |
251 | ((r2 = ,(charset-id 'ascii)) | |
252 | (r3 = ,(charset-id 'chinese-big5-1)) | |
253 | (r4 = ,(charset-id 'chinese-big5-2)) | |
254 | (r5 = ,(charset-id 'chinese-cns11643-1)) | |
255 | (r6 = ,(charset-id 'chinese-cns11643-2)) | |
256 | (loop | |
257 | (read-multibyte-character r0 r1) | |
258 | (if (r0 == r2) | |
259 | (write-repeat r1) | |
260 | (;; Big 5 encoded characters are first translated to CNS | |
261 | (if (r0 == r3) | |
262 | (translate-character big5-to-cns r0 r1) | |
263 | (if (r0 == r4) | |
264 | (translate-character big5-to-cns r0 r1))) | |
265 | (if (r0 == r5) | |
266 | (r0 = #xA1) | |
267 | (if (r0 == r6) | |
268 | (r0 = #xA2) | |
269 | (if (r0 == ,(charset-id 'chinese-cns11643-3)) | |
270 | (r0 = #xA3) | |
271 | (if (r0 == ,(charset-id 'chinese-cns11643-4)) | |
272 | (r0 = #xA4) | |
273 | (if (r0 == ,(charset-id 'chinese-cns11643-5)) | |
274 | (r0 = #xA5) | |
275 | (if (r0 == ,(charset-id 'chinese-cns11643-6)) | |
276 | (r0 = #xA6) | |
277 | (if (r0 == ,(charset-id 'chinese-cns11643-7)) | |
278 | (r0 = #xA7) | |
279 | ;; not CNS. We use a dummy character which | |
280 | ;; can't occur in EUC-TW encoding to indicate | |
281 | ;; this. | |
282 | (write-repeat #xFF)))))))))) | |
283 | (if (r0 != #xA1) | |
284 | ;; single shift and CNS plane | |
285 | ((write #x8E) | |
286 | (write r0))) | |
287 | (write ((r1 >> 7) + #x80)) | |
288 | (write ((r1 % #x80) + #x80)) | |
289 | (repeat)))) | |
290 | "CCL program to encode EUC-TW encoding." | |
291 | ) | |
292 | ||
293 | (defun euc-tw-pre-write-conversion (beg end) | |
294 | "Semi-dummy pre-write function effectively to autoload china-util." | |
295 | ;; Ensure translation table is loaded. | |
296 | (require 'china-util) | |
297 | ;; Don't do this again. | |
298 | (coding-system-put 'euc-tw 'pre-write-conversion nil) | |
299 | nil) | |
300 | ||
301 | (make-coding-system | |
302 | 'euc-tw 4 ?Z | |
303 | "ISO 2022 based EUC encoding for Chinese CNS11643. | |
304 | Big5 encoding is accepted for input also (which is then converted to CNS)." | |
305 | '(ccl-decode-euc-tw . ccl-encode-euc-tw) | |
306 | '((safe-charsets ascii | |
307 | chinese-big5-1 | |
308 | chinese-big5-2 | |
309 | chinese-cns11643-1 | |
310 | chinese-cns11643-2 | |
311 | chinese-cns11643-3 | |
312 | chinese-cns11643-4 | |
313 | chinese-cns11643-5 | |
314 | chinese-cns11643-6 | |
315 | chinese-cns11643-7) | |
316 | (valid-codes (0 . 255)) | |
317 | (pre-write-conversion . euc-tw-pre-write-conversion))) | |
318 | ||
319 | (define-coding-system-alias 'euc-taiwan 'euc-tw) | |
320 | ||
4ed46869 | 321 | (set-language-info-alist |
a564ccf9 | 322 | "Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2 |
4138c943 KH |
323 | chinese-cns11643-3 chinese-cns11643-4 |
324 | chinese-cns11643-5 chinese-cns11643-6 | |
325 | chinese-cns11643-7) | |
285aac85 WL |
326 | (coding-system iso-2022-cn euc-tw) |
327 | (coding-priority iso-2022-cn euc-tw chinese-big5 | |
328 | chinese-iso-8bit) | |
a564ccf9 KH |
329 | (features china-util) |
330 | (input-method . "chinese-cns-quick") | |
285aac85 | 331 | (documentation . "\ |
9da20928 | 332 | Support for Chinese CNS character sets. Note that the EUC-TW coding system |
285aac85 | 333 | accepts Big5 for input also (which is then converted to CNS).")) |
4138c943 | 334 | '("Chinese")) |
4ed46869 | 335 | |
9da20928 DL |
336 | (set-language-info-alist |
337 | "Chinese-EUC-TW" '((charset chinese-cns11643-1 chinese-cns11643-2 | |
338 | chinese-cns11643-3 chinese-cns11643-4 | |
339 | chinese-cns11643-5 chinese-cns11643-6 | |
340 | chinese-cns11643-7 chinese-big5-1 chinese-big5-2) | |
341 | (coding-system euc-tw iso-2022-cn) | |
342 | (coding-priority euc-tw chinese-big5 iso-2022-cn | |
343 | chinese-iso-8bit) | |
344 | (features china-util) | |
345 | (input-method . "chinese-cns-quick") | |
346 | (documentation . "\ | |
347 | Support for Chinese, prefering the EUC-TW character set. Note that | |
348 | the EUC-TW coding system accepts Big5 for input also (which is then | |
349 | converted to CNS).")) | |
350 | '("Chinese")) | |
351 | ||
41da80b1 DL |
352 | (provide 'chinese) |
353 | ||
ab5796a9 | 354 | ;;; arch-tag: b82fcf7a-84f6-4e0b-b38c-1742dac0e09f |
4ed46869 | 355 | ;;; chinese.el ends here |