Commit | Line | Data |
---|---|---|
60370d40 | 1 | ;;; chinese.el --- support for Chinese -*- coding: iso-2022-7bit; -*- |
4ed46869 | 2 | |
eaa61218 KH |
3 | ;; Copyright (C) 2001, 2003 Free Software Foundation, Inc. |
4 | ;; Copyright (C) 1995, 1997, 1998 | |
5 | ;; National Institute of Advanced Industrial Science and Technology (AIST) | |
6 | ;; Registration Number H14PRO021 | |
4ed46869 KH |
7 | |
8 | ;; Keywords: multilingual, Chinese | |
9 | ||
10 | ;; This file is part of GNU Emacs. | |
11 | ||
12 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
13 | ;; it under the terms of the GNU General Public License as published by | |
14 | ;; the Free Software Foundation; either version 2, or (at your option) | |
15 | ;; any later version. | |
16 | ||
17 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;; GNU General Public License for more details. | |
21 | ||
22 | ;; You should have received a copy of the GNU General Public License | |
369314dc KH |
23 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
24 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
25 | ;; Boston, MA 02111-1307, USA. | |
4ed46869 KH |
26 | |
27 | ;;; Commentary: | |
28 | ||
29 | ;; For Chinese, three character sets GB2312, BIG5, and CNS11643 are | |
30 | ;; supported. | |
31 | ||
32 | ;;; Code: | |
33 | ||
34 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
35 | ;;; Chinese (general) | |
36 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
37 | ||
38 | (make-coding-system | |
4138c943 | 39 | 'iso-2022-cn 2 ?C |
285aac85 | 40 | "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)." |
4138c943 KH |
41 | '(ascii |
42 | (nil chinese-gb2312 chinese-cns11643-1) | |
43 | (nil chinese-cns11643-2) | |
44 | nil | |
45 | nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil | |
46 | init-bol) | |
47 | '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2) | |
48 | (mime-charset . iso-2022-cn))) | |
49 | ||
50 | (define-coding-system-alias 'chinese-iso-7bit 'iso-2022-cn) | |
51 | ||
52 | (make-coding-system | |
53 | 'iso-2022-cn-ext 2 ?C | |
285aac85 | 54 | "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)." |
4ed46869 KH |
55 | '(ascii |
56 | (nil chinese-gb2312 chinese-cns11643-1) | |
57 | (nil chinese-cns11643-2) | |
58 | (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 | |
59 | chinese-cns11643-6 chinese-cns11643-7) | |
f3f18123 | 60 | nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil |
a18aa841 | 61 | init-bol) |
4138c943 KH |
62 | '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2 |
63 | chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 | |
64 | chinese-cns11643-6 chinese-cns11643-7) | |
65 | (mime-charset . iso-2022-cn-ext))) | |
66 | ||
335a7ad7 | 67 | \f |
4ed46869 | 68 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
a1506d29 | 69 | ;;; Chinese GB2312 (simplified) |
4ed46869 KH |
70 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
71 | ||
72 | (make-coding-system | |
4b9121fc | 73 | 'chinese-iso-8bit 2 ?c |
285aac85 | 74 | "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:GB2312)." |
4138c943 KH |
75 | '(ascii chinese-gb2312 nil nil |
76 | nil ascii-eol ascii-cntl nil nil nil nil) | |
77 | '((safe-charsets ascii chinese-gb2312) | |
ff890e66 | 78 | (mime-charset . gb2312))) |
4ed46869 | 79 | |
71eabd24 RS |
80 | (define-coding-system-alias 'cn-gb-2312 'chinese-iso-8bit) |
81 | (define-coding-system-alias 'euc-china 'chinese-iso-8bit) | |
a18aa841 | 82 | (define-coding-system-alias 'euc-cn 'chinese-iso-8bit) |
4f35555a KH |
83 | (define-coding-system-alias 'cn-gb 'chinese-iso-8bit) |
84 | (define-coding-system-alias 'gb2312 'chinese-iso-8bit) | |
6eac8f52 | 85 | (define-coding-system-alias 'cp936 'chinese-iso-8bit) |
f3f18123 | 86 | |
4ed46869 | 87 | (make-coding-system |
4b9121fc | 88 | 'chinese-hz 0 ?z |
285aac85 | 89 | "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)." |
a18aa841 | 90 | nil |
4138c943 KH |
91 | '((safe-charsets ascii chinese-gb2312) |
92 | (mime-charset . hz-gb-2312) | |
93 | (post-read-conversion . post-read-decode-hz) | |
94 | (pre-write-conversion . pre-write-encode-hz))) | |
f3f18123 | 95 | |
71eabd24 RS |
96 | (define-coding-system-alias 'hz-gb-2312 'chinese-hz) |
97 | (define-coding-system-alias 'hz 'chinese-hz) | |
4ed46869 KH |
98 | |
99 | (defun post-read-decode-hz (len) | |
69f24acf KH |
100 | (let ((pos (point)) |
101 | (buffer-modified-p (buffer-modified-p)) | |
102 | last-coding-system-used) | |
103 | (prog1 | |
104 | (decode-hz-region pos (+ pos len)) | |
105 | (set-buffer-modified-p buffer-modified-p)))) | |
4ed46869 KH |
106 | |
107 | (defun pre-write-encode-hz (from to) | |
1944b2e7 | 108 | (let ((buf (current-buffer))) |
d64a0ef7 KH |
109 | (set-buffer (generate-new-buffer " *temp*")) |
110 | (if (stringp from) | |
111 | (insert from) | |
112 | (insert-buffer-substring buf from to)) | |
113 | (let (last-coding-system-used) | |
114 | (encode-hz-region 1 (point-max))) | |
4ed46869 KH |
115 | nil)) |
116 | ||
4ed46869 | 117 | (set-language-info-alist |
a564ccf9 | 118 | "Chinese-GB" '((charset chinese-gb2312 chinese-sisheng) |
4138c943 KH |
119 | (coding-system chinese-iso-8bit iso-2022-cn chinese-hz) |
120 | (coding-priority chinese-iso-8bit chinese-big5 iso-2022-cn) | |
a564ccf9 KH |
121 | (input-method . "chinese-py-punct") |
122 | (features china-util) | |
fab8252e | 123 | (sample-text . "Chinese (\e$AVPND\e(B,\e$AFUM(;0\e(B,\e$A::So\e(B) \e$ADc:C\e(B") |
04c00c85 EZ |
124 | (documentation . "Support for Chinese GB2312 character set.") |
125 | (tutorial . "TUTORIAL.cn")) | |
4138c943 | 126 | '("Chinese")) |
4ed46869 KH |
127 | |
128 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
129 | ;; Chinese BIG5 (traditional) | |
130 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
131 | ||
132 | (make-coding-system | |
285aac85 WL |
133 | 'chinese-big5 3 ?B |
134 | "BIG5 8-bit encoding for Chinese (MIME:Big5)." | |
4138c943 KH |
135 | nil |
136 | '((safe-charsets ascii chinese-big5-1 chinese-big5-2) | |
ff890e66 | 137 | (mime-charset . big5) |
d4c97509 KH |
138 | (charset-origin-alist (chinese-big5-1 "BIG5" encode-big5-char) |
139 | (chinese-big5-2 "BIG5" encode-big5-char)))) | |
4ed46869 | 140 | |
71eabd24 RS |
141 | (define-coding-system-alias 'big5 'chinese-big5) |
142 | (define-coding-system-alias 'cn-big5 'chinese-big5) | |
6eac8f52 | 143 | (define-coding-system-alias 'cp950 'chinese-big5) |
f3f18123 | 144 | |
4ed46869 KH |
145 | ;; Big5 font requires special encoding. |
146 | (define-ccl-program ccl-encode-big5-font | |
147 | `(0 | |
148 | ;; In: R0:chinese-big5-1 or chinese-big5-2 | |
149 | ;; R1:position code 1 | |
150 | ;; R2:position code 2 | |
151 | ;; Out: R1:font code point 1 | |
152 | ;; R2:font code point 2 | |
153 | ((r2 = ((((r1 - ?\x21) * 94) + r2) - ?\x21)) | |
154 | (if (r0 == ,(charset-id 'chinese-big5-2)) (r2 += 6280)) | |
155 | (r1 = ((r2 / 157) + ?\xA1)) | |
156 | (r2 %= 157) | |
157 | (if (r2 < ?\x3F) (r2 += ?\x40) (r2 += ?\x62)))) | |
158 | "CCL program to encode a Big5 code to code point of Big5 font.") | |
159 | ||
160 | (setq font-ccl-encoder-alist | |
161 | (cons (cons "big5" ccl-encode-big5-font) font-ccl-encoder-alist)) | |
162 | ||
4ed46869 | 163 | (set-language-info-alist |
a564ccf9 | 164 | "Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2) |
4138c943 KH |
165 | (coding-system chinese-big5 chinese-iso-7bit) |
166 | (coding-priority chinese-big5 iso-2022-cn chinese-iso-8bit) | |
a564ccf9 KH |
167 | (input-method . "chinese-py-punct-b5") |
168 | (features china-util) | |
fab8252e | 169 | (sample-text . "Cantonese (\e$(0GnM$\e(B,\e$(0N]0*Hd\e(B) \e$(0*/=(\e(B, \e$(0+$)p\e(B") |
04c00c85 EZ |
170 | (documentation . "Support for Chinese Big5 character set.") |
171 | (tutorial . "TUTORIAL.zh")) | |
4138c943 | 172 | '("Chinese")) |
4ed46869 KH |
173 | |
174 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
175 | ;; Chinese CNS11643 (traditional) | |
176 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
177 | ||
285aac85 WL |
178 | (defvar big5-to-cns (make-translation-table) |
179 | "Translation table for encoding to `euc-tw'.") | |
180 | ;; Could have been done by china-util loaded before. | |
181 | (unless (get 'big5-to-cns 'translation-table) | |
182 | (define-translation-table 'big5-to-cns big5-to-cns)) | |
183 | ||
184 | (define-ccl-program ccl-decode-euc-tw | |
185 | ;; CNS plane 1 needs either two or four bytes in EUC-TW encoding; | |
186 | ;; CNS planes 2 to 7 always need four bytes. In internal encoding of | |
187 | ;; Emacs, CNS planes 1 and 2 need three bytes, and planes 3 to 7 need | |
188 | ;; four bytes. Thus a buffer magnification value of 2 (for both | |
189 | ;; encoding and decoding) is sufficient. | |
190 | `(2 | |
191 | ;; we don't have enough registers to hold all charset-ids | |
192 | ((r4 = ,(charset-id 'chinese-cns11643-1)) | |
193 | (r5 = ,(charset-id 'chinese-cns11643-2)) | |
194 | (r6 = ,(charset-id 'chinese-cns11643-3)) | |
195 | (loop | |
196 | (read-if (r0 < #x80) | |
197 | ;; ASCII | |
198 | (write-repeat r0) | |
199 | ;; not ASCII | |
200 | (if (r0 == #x8E) | |
201 | ;; single shift | |
202 | (read-if (r1 < #xA1) | |
203 | ;; invalid byte | |
204 | ((write r0) | |
205 | (write-repeat r1)) | |
206 | (if (r1 > #xA7) | |
207 | ;; invalid plane | |
208 | ((write r0) | |
209 | (write-repeat r1)) | |
210 | ;; OK, we have a plane | |
211 | (read-if (r2 < #xA1) | |
212 | ;; invalid first byte | |
213 | ((write r0 r1) | |
214 | (write-repeat r2)) | |
215 | (read-if (r3 < #xA1) | |
216 | ;; invalid second byte | |
217 | ((write r0 r1 r2) | |
218 | (write-repeat r3)) | |
219 | ;; CNS 1-7, finally | |
220 | ((branch (r1 - #xA1) | |
221 | (r1 = r4) | |
222 | (r1 = r5) | |
223 | (r1 = r6) | |
224 | (r1 = ,(charset-id 'chinese-cns11643-4)) | |
225 | (r1 = ,(charset-id 'chinese-cns11643-5)) | |
226 | (r1 = ,(charset-id 'chinese-cns11643-6)) | |
227 | (r1 = ,(charset-id 'chinese-cns11643-7))) | |
228 | (r2 = ((((r2 - #x80) << 7) + r3) - #x80)) | |
229 | (write-multibyte-character r1 r2) | |
230 | (repeat)))))) | |
231 | ;; standard EUC | |
232 | (if (r0 < #xA1) | |
233 | ;; invalid first byte | |
234 | (write-repeat r0) | |
235 | (read-if (r1 < #xA1) | |
236 | ;; invalid second byte | |
237 | ((write r0) | |
238 | (write-repeat r1)) | |
239 | ;; CNS 1, finally | |
240 | ((r1 = ((((r0 - #x80) << 7) + r1) - #x80)) | |
241 | (write-multibyte-character r4 r1) | |
242 | (repeat))))))))) | |
243 | "CCL program to decode EUC-TW encoding." | |
244 | ) | |
245 | ||
246 | (define-ccl-program ccl-encode-euc-tw | |
247 | `(2 | |
248 | ;; we don't have enough registers to hold all charset-ids | |
249 | ((r2 = ,(charset-id 'ascii)) | |
250 | (r3 = ,(charset-id 'chinese-big5-1)) | |
251 | (r4 = ,(charset-id 'chinese-big5-2)) | |
252 | (r5 = ,(charset-id 'chinese-cns11643-1)) | |
253 | (r6 = ,(charset-id 'chinese-cns11643-2)) | |
254 | (loop | |
255 | (read-multibyte-character r0 r1) | |
256 | (if (r0 == r2) | |
257 | (write-repeat r1) | |
258 | (;; Big 5 encoded characters are first translated to CNS | |
259 | (if (r0 == r3) | |
260 | (translate-character big5-to-cns r0 r1) | |
261 | (if (r0 == r4) | |
262 | (translate-character big5-to-cns r0 r1))) | |
263 | (if (r0 == r5) | |
264 | (r0 = #xA1) | |
265 | (if (r0 == r6) | |
266 | (r0 = #xA2) | |
267 | (if (r0 == ,(charset-id 'chinese-cns11643-3)) | |
268 | (r0 = #xA3) | |
269 | (if (r0 == ,(charset-id 'chinese-cns11643-4)) | |
270 | (r0 = #xA4) | |
271 | (if (r0 == ,(charset-id 'chinese-cns11643-5)) | |
272 | (r0 = #xA5) | |
273 | (if (r0 == ,(charset-id 'chinese-cns11643-6)) | |
274 | (r0 = #xA6) | |
275 | (if (r0 == ,(charset-id 'chinese-cns11643-7)) | |
276 | (r0 = #xA7) | |
277 | ;; not CNS. We use a dummy character which | |
278 | ;; can't occur in EUC-TW encoding to indicate | |
279 | ;; this. | |
280 | (write-repeat #xFF)))))))))) | |
281 | (if (r0 != #xA1) | |
282 | ;; single shift and CNS plane | |
283 | ((write #x8E) | |
284 | (write r0))) | |
285 | (write ((r1 >> 7) + #x80)) | |
286 | (write ((r1 % #x80) + #x80)) | |
287 | (repeat)))) | |
288 | "CCL program to encode EUC-TW encoding." | |
289 | ) | |
290 | ||
291 | (defun euc-tw-pre-write-conversion (beg end) | |
292 | "Semi-dummy pre-write function effectively to autoload china-util." | |
293 | ;; Ensure translation table is loaded. | |
294 | (require 'china-util) | |
295 | ;; Don't do this again. | |
296 | (coding-system-put 'euc-tw 'pre-write-conversion nil) | |
297 | nil) | |
298 | ||
299 | (make-coding-system | |
300 | 'euc-tw 4 ?Z | |
301 | "ISO 2022 based EUC encoding for Chinese CNS11643. | |
302 | Big5 encoding is accepted for input also (which is then converted to CNS)." | |
303 | '(ccl-decode-euc-tw . ccl-encode-euc-tw) | |
304 | '((safe-charsets ascii | |
305 | chinese-big5-1 | |
306 | chinese-big5-2 | |
307 | chinese-cns11643-1 | |
308 | chinese-cns11643-2 | |
309 | chinese-cns11643-3 | |
310 | chinese-cns11643-4 | |
311 | chinese-cns11643-5 | |
312 | chinese-cns11643-6 | |
313 | chinese-cns11643-7) | |
314 | (valid-codes (0 . 255)) | |
315 | (pre-write-conversion . euc-tw-pre-write-conversion))) | |
316 | ||
317 | (define-coding-system-alias 'euc-taiwan 'euc-tw) | |
318 | ||
4ed46869 | 319 | (set-language-info-alist |
a564ccf9 | 320 | "Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2 |
4138c943 KH |
321 | chinese-cns11643-3 chinese-cns11643-4 |
322 | chinese-cns11643-5 chinese-cns11643-6 | |
323 | chinese-cns11643-7) | |
285aac85 WL |
324 | (coding-system iso-2022-cn euc-tw) |
325 | (coding-priority iso-2022-cn euc-tw chinese-big5 | |
326 | chinese-iso-8bit) | |
a564ccf9 KH |
327 | (features china-util) |
328 | (input-method . "chinese-cns-quick") | |
285aac85 | 329 | (documentation . "\ |
9da20928 | 330 | Support for Chinese CNS character sets. Note that the EUC-TW coding system |
285aac85 | 331 | accepts Big5 for input also (which is then converted to CNS).")) |
4138c943 | 332 | '("Chinese")) |
4ed46869 | 333 | |
9da20928 DL |
334 | (set-language-info-alist |
335 | "Chinese-EUC-TW" '((charset chinese-cns11643-1 chinese-cns11643-2 | |
336 | chinese-cns11643-3 chinese-cns11643-4 | |
337 | chinese-cns11643-5 chinese-cns11643-6 | |
338 | chinese-cns11643-7 chinese-big5-1 chinese-big5-2) | |
339 | (coding-system euc-tw iso-2022-cn) | |
340 | (coding-priority euc-tw chinese-big5 iso-2022-cn | |
341 | chinese-iso-8bit) | |
342 | (features china-util) | |
343 | (input-method . "chinese-cns-quick") | |
344 | (documentation . "\ | |
345 | Support for Chinese, prefering the EUC-TW character set. Note that | |
346 | the EUC-TW coding system accepts Big5 for input also (which is then | |
347 | converted to CNS).")) | |
348 | '("Chinese")) | |
349 | ||
41da80b1 DL |
350 | (provide 'chinese) |
351 | ||
ab5796a9 | 352 | ;;; arch-tag: b82fcf7a-84f6-4e0b-b38c-1742dac0e09f |
4ed46869 | 353 | ;;; chinese.el ends here |