Commit | Line | Data |
---|---|---|
60370d40 | 1 | ;;; chinese.el --- support for Chinese -*- coding: iso-2022-7bit; -*- |
4ed46869 | 2 | |
4ed46869 | 3 | ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. |
fa526c4a | 4 | ;; Licensed to the Free Software Foundation. |
4ed46869 KH |
5 | |
6 | ;; Keywords: multilingual, Chinese | |
7 | ||
8 | ;; This file is part of GNU Emacs. | |
9 | ||
10 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
11 | ;; it under the terms of the GNU General Public License as published by | |
12 | ;; the Free Software Foundation; either version 2, or (at your option) | |
13 | ;; any later version. | |
14 | ||
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
369314dc KH |
21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
22 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
23 | ;; Boston, MA 02111-1307, USA. | |
4ed46869 KH |
24 | |
25 | ;;; Commentary: | |
26 | ||
27 | ;; For Chinese, three character sets GB2312, BIG5, and CNS11643 are | |
28 | ;; supported. | |
29 | ||
30 | ;;; Code: | |
31 | ||
32 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
33 | ;;; Chinese (general) | |
34 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
35 | ||
36 | (make-coding-system | |
4138c943 | 37 | 'iso-2022-cn 2 ?C |
285aac85 | 38 | "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)." |
4138c943 KH |
39 | '(ascii |
40 | (nil chinese-gb2312 chinese-cns11643-1) | |
41 | (nil chinese-cns11643-2) | |
42 | nil | |
43 | nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil | |
44 | init-bol) | |
45 | '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2) | |
46 | (mime-charset . iso-2022-cn))) | |
47 | ||
48 | (define-coding-system-alias 'chinese-iso-7bit 'iso-2022-cn) | |
49 | ||
50 | (make-coding-system | |
51 | 'iso-2022-cn-ext 2 ?C | |
285aac85 | 52 | "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)." |
4ed46869 KH |
53 | '(ascii |
54 | (nil chinese-gb2312 chinese-cns11643-1) | |
55 | (nil chinese-cns11643-2) | |
56 | (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 | |
57 | chinese-cns11643-6 chinese-cns11643-7) | |
f3f18123 | 58 | nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil |
a18aa841 | 59 | init-bol) |
4138c943 KH |
60 | '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2 |
61 | chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 | |
62 | chinese-cns11643-6 chinese-cns11643-7) | |
63 | (mime-charset . iso-2022-cn-ext))) | |
64 | ||
335a7ad7 | 65 | \f |
4ed46869 | 66 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
a1506d29 | 67 | ;;; Chinese GB2312 (simplified) |
4ed46869 KH |
68 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
69 | ||
70 | (make-coding-system | |
4b9121fc | 71 | 'chinese-iso-8bit 2 ?c |
285aac85 | 72 | "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:GB2312)." |
4138c943 KH |
73 | '(ascii chinese-gb2312 nil nil |
74 | nil ascii-eol ascii-cntl nil nil nil nil) | |
75 | '((safe-charsets ascii chinese-gb2312) | |
ff890e66 | 76 | (mime-charset . gb2312))) |
4ed46869 | 77 | |
71eabd24 RS |
78 | (define-coding-system-alias 'cn-gb-2312 'chinese-iso-8bit) |
79 | (define-coding-system-alias 'euc-china 'chinese-iso-8bit) | |
a18aa841 | 80 | (define-coding-system-alias 'euc-cn 'chinese-iso-8bit) |
4f35555a KH |
81 | (define-coding-system-alias 'cn-gb 'chinese-iso-8bit) |
82 | (define-coding-system-alias 'gb2312 'chinese-iso-8bit) | |
f3f18123 | 83 | |
4ed46869 | 84 | (make-coding-system |
4b9121fc | 85 | 'chinese-hz 0 ?z |
285aac85 | 86 | "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)." |
a18aa841 | 87 | nil |
4138c943 KH |
88 | '((safe-charsets ascii chinese-gb2312) |
89 | (mime-charset . hz-gb-2312) | |
90 | (post-read-conversion . post-read-decode-hz) | |
91 | (pre-write-conversion . pre-write-encode-hz))) | |
f3f18123 | 92 | |
71eabd24 RS |
93 | (define-coding-system-alias 'hz-gb-2312 'chinese-hz) |
94 | (define-coding-system-alias 'hz 'chinese-hz) | |
4ed46869 KH |
95 | |
96 | (defun post-read-decode-hz (len) | |
69f24acf KH |
97 | (let ((pos (point)) |
98 | (buffer-modified-p (buffer-modified-p)) | |
99 | last-coding-system-used) | |
100 | (prog1 | |
101 | (decode-hz-region pos (+ pos len)) | |
102 | (set-buffer-modified-p buffer-modified-p)))) | |
4ed46869 KH |
103 | |
104 | (defun pre-write-encode-hz (from to) | |
1944b2e7 | 105 | (let ((buf (current-buffer))) |
d64a0ef7 KH |
106 | (set-buffer (generate-new-buffer " *temp*")) |
107 | (if (stringp from) | |
108 | (insert from) | |
109 | (insert-buffer-substring buf from to)) | |
110 | (let (last-coding-system-used) | |
111 | (encode-hz-region 1 (point-max))) | |
4ed46869 KH |
112 | nil)) |
113 | ||
4ed46869 | 114 | (set-language-info-alist |
a564ccf9 | 115 | "Chinese-GB" '((charset chinese-gb2312 chinese-sisheng) |
4138c943 KH |
116 | (coding-system chinese-iso-8bit iso-2022-cn chinese-hz) |
117 | (coding-priority chinese-iso-8bit chinese-big5 iso-2022-cn) | |
a564ccf9 KH |
118 | (input-method . "chinese-py-punct") |
119 | (features china-util) | |
fab8252e | 120 | (sample-text . "Chinese (\e$AVPND\e(B,\e$AFUM(;0\e(B,\e$A::So\e(B) \e$ADc:C\e(B") |
4138c943 KH |
121 | (documentation . "Support for Chinese GB2312 character set.")) |
122 | '("Chinese")) | |
4ed46869 KH |
123 | |
124 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
125 | ;; Chinese BIG5 (traditional) | |
126 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
127 | ||
128 | (make-coding-system | |
285aac85 WL |
129 | 'chinese-big5 3 ?B |
130 | "BIG5 8-bit encoding for Chinese (MIME:Big5)." | |
4138c943 KH |
131 | nil |
132 | '((safe-charsets ascii chinese-big5-1 chinese-big5-2) | |
ff890e66 | 133 | (mime-charset . big5) |
d4c97509 KH |
134 | (charset-origin-alist (chinese-big5-1 "BIG5" encode-big5-char) |
135 | (chinese-big5-2 "BIG5" encode-big5-char)))) | |
4ed46869 | 136 | |
71eabd24 RS |
137 | (define-coding-system-alias 'big5 'chinese-big5) |
138 | (define-coding-system-alias 'cn-big5 'chinese-big5) | |
f3f18123 | 139 | |
4ed46869 KH |
140 | ;; Big5 font requires special encoding. |
141 | (define-ccl-program ccl-encode-big5-font | |
142 | `(0 | |
143 | ;; In: R0:chinese-big5-1 or chinese-big5-2 | |
144 | ;; R1:position code 1 | |
145 | ;; R2:position code 2 | |
146 | ;; Out: R1:font code point 1 | |
147 | ;; R2:font code point 2 | |
148 | ((r2 = ((((r1 - ?\x21) * 94) + r2) - ?\x21)) | |
149 | (if (r0 == ,(charset-id 'chinese-big5-2)) (r2 += 6280)) | |
150 | (r1 = ((r2 / 157) + ?\xA1)) | |
151 | (r2 %= 157) | |
152 | (if (r2 < ?\x3F) (r2 += ?\x40) (r2 += ?\x62)))) | |
153 | "CCL program to encode a Big5 code to code point of Big5 font.") | |
154 | ||
155 | (setq font-ccl-encoder-alist | |
156 | (cons (cons "big5" ccl-encode-big5-font) font-ccl-encoder-alist)) | |
157 | ||
4ed46869 | 158 | (set-language-info-alist |
a564ccf9 | 159 | "Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2) |
4138c943 KH |
160 | (coding-system chinese-big5 chinese-iso-7bit) |
161 | (coding-priority chinese-big5 iso-2022-cn chinese-iso-8bit) | |
a564ccf9 KH |
162 | (input-method . "chinese-py-punct-b5") |
163 | (features china-util) | |
fab8252e | 164 | (sample-text . "Cantonese (\e$(0GnM$\e(B,\e$(0N]0*Hd\e(B) \e$(0*/=(\e(B, \e$(0+$)p\e(B") |
4138c943 KH |
165 | (documentation . "Support for Chinese Big5 character set.")) |
166 | '("Chinese")) | |
4ed46869 KH |
167 | |
168 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
169 | ;; Chinese CNS11643 (traditional) | |
170 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
171 | ||
285aac85 WL |
172 | (defvar big5-to-cns (make-translation-table) |
173 | "Translation table for encoding to `euc-tw'.") | |
174 | ;; Could have been done by china-util loaded before. | |
175 | (unless (get 'big5-to-cns 'translation-table) | |
176 | (define-translation-table 'big5-to-cns big5-to-cns)) | |
177 | ||
178 | (define-ccl-program ccl-decode-euc-tw | |
179 | ;; CNS plane 1 needs either two or four bytes in EUC-TW encoding; | |
180 | ;; CNS planes 2 to 7 always need four bytes. In internal encoding of | |
181 | ;; Emacs, CNS planes 1 and 2 need three bytes, and planes 3 to 7 need | |
182 | ;; four bytes. Thus a buffer magnification value of 2 (for both | |
183 | ;; encoding and decoding) is sufficient. | |
184 | `(2 | |
185 | ;; we don't have enough registers to hold all charset-ids | |
186 | ((r4 = ,(charset-id 'chinese-cns11643-1)) | |
187 | (r5 = ,(charset-id 'chinese-cns11643-2)) | |
188 | (r6 = ,(charset-id 'chinese-cns11643-3)) | |
189 | (loop | |
190 | (read-if (r0 < #x80) | |
191 | ;; ASCII | |
192 | (write-repeat r0) | |
193 | ;; not ASCII | |
194 | (if (r0 == #x8E) | |
195 | ;; single shift | |
196 | (read-if (r1 < #xA1) | |
197 | ;; invalid byte | |
198 | ((write r0) | |
199 | (write-repeat r1)) | |
200 | (if (r1 > #xA7) | |
201 | ;; invalid plane | |
202 | ((write r0) | |
203 | (write-repeat r1)) | |
204 | ;; OK, we have a plane | |
205 | (read-if (r2 < #xA1) | |
206 | ;; invalid first byte | |
207 | ((write r0 r1) | |
208 | (write-repeat r2)) | |
209 | (read-if (r3 < #xA1) | |
210 | ;; invalid second byte | |
211 | ((write r0 r1 r2) | |
212 | (write-repeat r3)) | |
213 | ;; CNS 1-7, finally | |
214 | ((branch (r1 - #xA1) | |
215 | (r1 = r4) | |
216 | (r1 = r5) | |
217 | (r1 = r6) | |
218 | (r1 = ,(charset-id 'chinese-cns11643-4)) | |
219 | (r1 = ,(charset-id 'chinese-cns11643-5)) | |
220 | (r1 = ,(charset-id 'chinese-cns11643-6)) | |
221 | (r1 = ,(charset-id 'chinese-cns11643-7))) | |
222 | (r2 = ((((r2 - #x80) << 7) + r3) - #x80)) | |
223 | (write-multibyte-character r1 r2) | |
224 | (repeat)))))) | |
225 | ;; standard EUC | |
226 | (if (r0 < #xA1) | |
227 | ;; invalid first byte | |
228 | (write-repeat r0) | |
229 | (read-if (r1 < #xA1) | |
230 | ;; invalid second byte | |
231 | ((write r0) | |
232 | (write-repeat r1)) | |
233 | ;; CNS 1, finally | |
234 | ((r1 = ((((r0 - #x80) << 7) + r1) - #x80)) | |
235 | (write-multibyte-character r4 r1) | |
236 | (repeat))))))))) | |
237 | "CCL program to decode EUC-TW encoding." | |
238 | ) | |
239 | ||
240 | (define-ccl-program ccl-encode-euc-tw | |
241 | `(2 | |
242 | ;; we don't have enough registers to hold all charset-ids | |
243 | ((r2 = ,(charset-id 'ascii)) | |
244 | (r3 = ,(charset-id 'chinese-big5-1)) | |
245 | (r4 = ,(charset-id 'chinese-big5-2)) | |
246 | (r5 = ,(charset-id 'chinese-cns11643-1)) | |
247 | (r6 = ,(charset-id 'chinese-cns11643-2)) | |
248 | (loop | |
249 | (read-multibyte-character r0 r1) | |
250 | (if (r0 == r2) | |
251 | (write-repeat r1) | |
252 | (;; Big 5 encoded characters are first translated to CNS | |
253 | (if (r0 == r3) | |
254 | (translate-character big5-to-cns r0 r1) | |
255 | (if (r0 == r4) | |
256 | (translate-character big5-to-cns r0 r1))) | |
257 | (if (r0 == r5) | |
258 | (r0 = #xA1) | |
259 | (if (r0 == r6) | |
260 | (r0 = #xA2) | |
261 | (if (r0 == ,(charset-id 'chinese-cns11643-3)) | |
262 | (r0 = #xA3) | |
263 | (if (r0 == ,(charset-id 'chinese-cns11643-4)) | |
264 | (r0 = #xA4) | |
265 | (if (r0 == ,(charset-id 'chinese-cns11643-5)) | |
266 | (r0 = #xA5) | |
267 | (if (r0 == ,(charset-id 'chinese-cns11643-6)) | |
268 | (r0 = #xA6) | |
269 | (if (r0 == ,(charset-id 'chinese-cns11643-7)) | |
270 | (r0 = #xA7) | |
271 | ;; not CNS. We use a dummy character which | |
272 | ;; can't occur in EUC-TW encoding to indicate | |
273 | ;; this. | |
274 | (write-repeat #xFF)))))))))) | |
275 | (if (r0 != #xA1) | |
276 | ;; single shift and CNS plane | |
277 | ((write #x8E) | |
278 | (write r0))) | |
279 | (write ((r1 >> 7) + #x80)) | |
280 | (write ((r1 % #x80) + #x80)) | |
281 | (repeat)))) | |
282 | "CCL program to encode EUC-TW encoding." | |
283 | ) | |
284 | ||
285 | (defun euc-tw-pre-write-conversion (beg end) | |
286 | "Semi-dummy pre-write function effectively to autoload china-util." | |
287 | ;; Ensure translation table is loaded. | |
288 | (require 'china-util) | |
289 | ;; Don't do this again. | |
290 | (coding-system-put 'euc-tw 'pre-write-conversion nil) | |
291 | nil) | |
292 | ||
293 | (make-coding-system | |
294 | 'euc-tw 4 ?Z | |
295 | "ISO 2022 based EUC encoding for Chinese CNS11643. | |
296 | Big5 encoding is accepted for input also (which is then converted to CNS)." | |
297 | '(ccl-decode-euc-tw . ccl-encode-euc-tw) | |
298 | '((safe-charsets ascii | |
299 | chinese-big5-1 | |
300 | chinese-big5-2 | |
301 | chinese-cns11643-1 | |
302 | chinese-cns11643-2 | |
303 | chinese-cns11643-3 | |
304 | chinese-cns11643-4 | |
305 | chinese-cns11643-5 | |
306 | chinese-cns11643-6 | |
307 | chinese-cns11643-7) | |
308 | (valid-codes (0 . 255)) | |
309 | (pre-write-conversion . euc-tw-pre-write-conversion))) | |
310 | ||
311 | (define-coding-system-alias 'euc-taiwan 'euc-tw) | |
312 | ||
4ed46869 | 313 | (set-language-info-alist |
a564ccf9 | 314 | "Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2 |
4138c943 KH |
315 | chinese-cns11643-3 chinese-cns11643-4 |
316 | chinese-cns11643-5 chinese-cns11643-6 | |
317 | chinese-cns11643-7) | |
285aac85 WL |
318 | (coding-system iso-2022-cn euc-tw) |
319 | (coding-priority iso-2022-cn euc-tw chinese-big5 | |
320 | chinese-iso-8bit) | |
a564ccf9 KH |
321 | (features china-util) |
322 | (input-method . "chinese-cns-quick") | |
285aac85 | 323 | (documentation . "\ |
9da20928 | 324 | Support for Chinese CNS character sets. Note that the EUC-TW coding system |
285aac85 | 325 | accepts Big5 for input also (which is then converted to CNS).")) |
4138c943 | 326 | '("Chinese")) |
4ed46869 | 327 | |
9da20928 DL |
328 | (set-language-info-alist |
329 | "Chinese-EUC-TW" '((charset chinese-cns11643-1 chinese-cns11643-2 | |
330 | chinese-cns11643-3 chinese-cns11643-4 | |
331 | chinese-cns11643-5 chinese-cns11643-6 | |
332 | chinese-cns11643-7 chinese-big5-1 chinese-big5-2) | |
333 | (coding-system euc-tw iso-2022-cn) | |
334 | (coding-priority euc-tw chinese-big5 iso-2022-cn | |
335 | chinese-iso-8bit) | |
336 | (features china-util) | |
337 | (input-method . "chinese-cns-quick") | |
338 | (documentation . "\ | |
339 | Support for Chinese, prefering the EUC-TW character set. Note that | |
340 | the EUC-TW coding system accepts Big5 for input also (which is then | |
341 | converted to CNS).")) | |
342 | '("Chinese")) | |
343 | ||
41da80b1 DL |
344 | (provide 'chinese) |
345 | ||
ab5796a9 | 346 | ;;; arch-tag: b82fcf7a-84f6-4e0b-b38c-1742dac0e09f |
4ed46869 | 347 | ;;; chinese.el ends here |