Commit | Line | Data |
---|---|---|
4ed46869 KH |
1 | ;; china-util.el -- utilities for Chinese |
2 | ||
3 | ;; Copyright (C) 1995 Free Software Foundation, Inc. | |
4 | ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. | |
5 | ||
6 | ;; Keywords: mule, multilingual, Chinese | |
7 | ||
8 | ;; This file is part of GNU Emacs. | |
9 | ||
10 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
11 | ;; it under the terms of the GNU General Public License as published by | |
12 | ;; the Free Software Foundation; either version 2, or (at your option) | |
13 | ;; any later version. | |
14 | ||
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
369314dc KH |
21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
22 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
23 | ;; Boston, MA 02111-1307, USA. | |
4ed46869 KH |
24 | |
25 | ;;; Code: | |
26 | ||
27 | ;; Hz/ZW encoding stuffs | |
28 | ||
29 | ;; HZ is an encoding method for Chinese character set GB2312 used | |
30 | ;; widely in Internet. It is very similar to 7-bit environment of | |
31 | ;; ISO-2022. The difference is that HZ uses the sequence "~{" and | |
32 | ;; "~}" for designating GB2312 and ASCII respectively, hence, it | |
33 | ;; doesn't uses ESC (0x1B) code. | |
34 | ||
35 | ;; ZW is another encoding method for Chinese character set GB2312. It | |
36 | ;; encodes Chinese characters line by line by starting each line with | |
37 | ;; the sequence "zW". It also uses only 7-bit as HZ. | |
38 | ||
39 | ;; ISO-2022 escape sequence to designate GB2312. | |
40 | (defvar iso2022-gb-designation "\e$A") | |
41 | ;; HZ escape sequence to designate GB2312. | |
42 | (defvar hz-gb-designnation "~{") | |
43 | ;; ISO-2022 escape sequence to designate ASCII. | |
44 | (defvar iso2022-ascii-designation "\e(B") | |
45 | ;; HZ escape sequence to designate ASCII. | |
46 | (defvar hz-ascii-designnation "~}") | |
47 | ;; Regexp of ZW sequence to start GB2312. | |
48 | (defvar zw-start-gb "^zW") | |
49 | ;; Regexp for start of GB2312 in an encoding mixture of HZ and ZW. | |
50 | (defvar hz/zw-start-gb (concat hz-gb-designnation "\\|" zw-start-gb)) | |
51 | ||
52 | (defvar decode-hz-line-continuation nil | |
53 | "Flag to tell if we should care line continuation convention of Hz.") | |
54 | ||
55 | ;;;###autoload | |
56 | (defun decode-hz-region (beg end) | |
57 | "Decode HZ/ZW encoded text in the current region. | |
58 | Return the length of resulting text." | |
59 | (interactive "r") | |
60 | (save-excursion | |
61 | (save-restriction | |
62 | (narrow-to-region beg end) | |
63 | ||
a7f2c216 | 64 | ;; We, at first, convert HZ/ZW to `iso-2022-7', |
4ed46869 KH |
65 | ;; then decode it. |
66 | ||
67 | ;; "~\n" -> "\n" | |
68 | (goto-char (point-min)) | |
69 | (while (search-forward "~" nil t) | |
70 | (if (= (following-char) ?\n) (delete-char -1)) | |
71 | (if (not (eobp)) (forward-char 1))) | |
72 | ||
73 | ;; "^zW...\n" -> Chinese GB2312 | |
74 | ;; "~{...~}" -> Chinese GB2312 | |
75 | (goto-char (point-min)) | |
76 | (let ((chinese-found nil)) | |
77 | (while (re-search-forward hz/zw-start-gb nil t) | |
78 | (if (= (char-after (match-beginning 0)) ?z) | |
a7f2c216 | 79 | ;; ZW -> iso-20227-7 |
4ed46869 KH |
80 | (progn |
81 | (delete-char -2) | |
82 | (insert iso2022-gb-designation) | |
83 | (end-of-line) | |
84 | (insert iso2022-ascii-designation)) | |
a7f2c216 | 85 | ;; HZ -> iso-20227-7 |
4ed46869 KH |
86 | (delete-char -2) |
87 | (insert iso2022-gb-designation) | |
88 | (let ((pos (save-excursion (end-of-line) (point)))) | |
89 | (if (search-forward hz-ascii-designnation pos t) | |
90 | (replace-match iso2022-ascii-designation) | |
91 | (if (not decode-hz-line-continuation) | |
92 | (insert iso2022-ascii-designation))))) | |
93 | (setq chinese-found t)) | |
94 | (if (or chinese-found | |
95 | (let ((enable-multibyte-characters nil)) | |
96 | ;; Here we check if the text contains EUC (China) codes. | |
97 | ;; If any, we had better decode them also. | |
98 | (goto-char (point-min)) | |
99 | (re-search-forward "[\240-\377]" nil t))) | |
a7f2c216 | 100 | (decode-coding-region (point-min) (point-max) 'euc-china))) |
4ed46869 KH |
101 | |
102 | ;; "~~" -> "~" | |
103 | (goto-char (point-min)) | |
104 | (while (search-forward "~~" nil t) (delete-char -1)) | |
105 | (- (point-max) (point-min))))) | |
106 | ||
107 | ;;;###autoload | |
108 | (defun decode-hz-buffer () | |
109 | "Decode HZ/ZW encoded text in the current buffer." | |
110 | (interactive) | |
111 | (decode-hz-region (point-min) (point-max))) | |
112 | ||
113 | ;;;###autoload | |
114 | (defun encode-hz-region (beg end) | |
115 | "Encode the text in the current region to HZ. | |
116 | Return the length of resulting text." | |
117 | (interactive "r") | |
118 | (save-excursion | |
119 | (save-restriction | |
120 | (narrow-to-region beg end) | |
121 | ||
122 | ;; "~" -> "~~" | |
123 | (goto-char (point-min)) | |
124 | (while (search-forward "~" nil t) (insert ?~)) | |
125 | ||
126 | ;; Chinese GB2312 -> "~{...~}" | |
127 | (goto-char (point-min)) | |
128 | (if (re-search-forward "\\cc" nil t) | |
129 | (let ((enable-multibyte-characters nil) | |
130 | pos) | |
131 | (goto-char (setq pos (match-beginning 0))) | |
a7f2c216 | 132 | (encode-coding-region pos (point-max) 'iso-2022-7) |
4ed46869 KH |
133 | (goto-char pos) |
134 | (while (search-forward iso2022-gb-designation nil t) | |
135 | (delete-char -3) | |
136 | (insert hz-gb-designnation)) | |
137 | (goto-char pos) | |
138 | (while (search-forward iso2022-ascii-designation nil t) | |
139 | (delete-char -3) | |
140 | (insert hz-ascii-designnation)))) | |
141 | (- (point-max) (point-min))))) | |
142 | ||
143 | ;;;###autoload | |
144 | (defun encode-hz-buffer () | |
145 | "Encode the text in the current buffer to HZ." | |
146 | (interactive) | |
147 | (encode-hz-region (point-min) (point-max))) | |
148 | ||
149 | ;; | |
150 | (provide 'language/china-util) | |
151 | ||
152 | ;;; Local Variables: | |
153 | ;;; generated-autoload-file: "../loaddefs.el" | |
154 | ;;; End: | |
155 | ;;; china-util.el ends here |