Commit | Line | Data |
---|---|---|
3fdc9c8f | 1 | ;;; china-util.el --- utilities for Chinese |
4ed46869 | 2 | |
4ed46869 | 3 | ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. |
fa526c4a | 4 | ;; Licensed to the Free Software Foundation. |
4ed46869 KH |
5 | |
6 | ;; Keywords: mule, multilingual, Chinese | |
7 | ||
8 | ;; This file is part of GNU Emacs. | |
9 | ||
10 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
11 | ;; it under the terms of the GNU General Public License as published by | |
12 | ;; the Free Software Foundation; either version 2, or (at your option) | |
13 | ;; any later version. | |
14 | ||
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
369314dc KH |
21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
22 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
23 | ;; Boston, MA 02111-1307, USA. | |
4ed46869 | 24 | |
60370d40 PJ |
25 | ;;; Commentary: |
26 | ||
4ed46869 KH |
27 | ;;; Code: |
28 | ||
29 | ;; Hz/ZW encoding stuffs | |
30 | ||
31 | ;; HZ is an encoding method for Chinese character set GB2312 used | |
32 | ;; widely in Internet. It is very similar to 7-bit environment of | |
33 | ;; ISO-2022. The difference is that HZ uses the sequence "~{" and | |
34 | ;; "~}" for designating GB2312 and ASCII respectively, hence, it | |
35 | ;; doesn't uses ESC (0x1B) code. | |
36 | ||
37 | ;; ZW is another encoding method for Chinese character set GB2312. It | |
38 | ;; encodes Chinese characters line by line by starting each line with | |
39 | ;; the sequence "zW". It also uses only 7-bit as HZ. | |
40 | ||
41 | ;; ISO-2022 escape sequence to designate GB2312. | |
42 | (defvar iso2022-gb-designation "\e$A") | |
43 | ;; HZ escape sequence to designate GB2312. | |
44 | (defvar hz-gb-designnation "~{") | |
45 | ;; ISO-2022 escape sequence to designate ASCII. | |
46 | (defvar iso2022-ascii-designation "\e(B") | |
47 | ;; HZ escape sequence to designate ASCII. | |
48 | (defvar hz-ascii-designnation "~}") | |
49 | ;; Regexp of ZW sequence to start GB2312. | |
50 | (defvar zw-start-gb "^zW") | |
51 | ;; Regexp for start of GB2312 in an encoding mixture of HZ and ZW. | |
8f3969f8 KH |
52 | (defvar hz/zw-start-gb |
53 | (concat hz-gb-designnation "\\|" zw-start-gb "\\|[^\0-\177]")) | |
4ed46869 KH |
54 | |
55 | (defvar decode-hz-line-continuation nil | |
56 | "Flag to tell if we should care line continuation convention of Hz.") | |
57 | ||
8f3969f8 KH |
58 | (defconst hz-set-msb-table |
59 | (let ((str (make-string 127 0)) | |
60 | (i 0)) | |
61 | (while (< i 33) | |
62 | (aset str i i) | |
63 | (setq i (1+ i))) | |
64 | (while (< i 127) | |
65 | (aset str i (+ i 128)) | |
66 | (setq i (1+ i))) | |
67 | str)) | |
68 | ||
4ed46869 KH |
69 | ;;;###autoload |
70 | (defun decode-hz-region (beg end) | |
71 | "Decode HZ/ZW encoded text in the current region. | |
72 | Return the length of resulting text." | |
73 | (interactive "r") | |
74 | (save-excursion | |
75 | (save-restriction | |
8f3969f8 KH |
76 | (let (pos ch) |
77 | (narrow-to-region beg end) | |
78 | ||
79 | ;; We, at first, convert HZ/ZW to `euc-china', | |
80 | ;; then decode it. | |
81 | ||
82 | ;; "~\n" -> "\n", "~~" -> "~" | |
83 | (goto-char (point-min)) | |
84 | (while (search-forward "~" nil t) | |
85 | (setq ch (following-char)) | |
86 | (if (or (= ch ?\n) (= ch ?~)) (delete-char -1))) | |
87 | ||
88 | ;; "^zW...\n" -> Chinese GB2312 | |
89 | ;; "~{...~}" -> Chinese GB2312 | |
90 | (goto-char (point-min)) | |
91 | (setq beg nil) | |
4ed46869 | 92 | (while (re-search-forward hz/zw-start-gb nil t) |
8f3969f8 KH |
93 | (setq pos (match-beginning 0) |
94 | ch (char-after pos)) | |
95 | ;; Record the first position to start conversion. | |
96 | (or beg (setq beg pos)) | |
97 | (end-of-line) | |
98 | (setq end (point)) | |
99 | (if (>= ch 128) ; 8bit GB2312 | |
100 | nil | |
101 | (goto-char pos) | |
102 | (delete-char 2) | |
103 | (setq end (- end 2)) | |
104 | (if (= ch ?z) ; ZW -> euc-china | |
105 | (progn | |
106 | (translate-region (point) end hz-set-msb-table) | |
107 | (goto-char end)) | |
108 | (if (search-forward hz-ascii-designnation | |
109 | (if decode-hz-line-continuation nil end) | |
110 | t) | |
111 | (delete-char -2)) | |
112 | (setq end (point)) | |
113 | (translate-region pos (point) hz-set-msb-table)))) | |
114 | (if beg | |
115 | (decode-coding-region beg end 'euc-china))) | |
4ed46869 KH |
116 | (- (point-max) (point-min))))) |
117 | ||
118 | ;;;###autoload | |
119 | (defun decode-hz-buffer () | |
120 | "Decode HZ/ZW encoded text in the current buffer." | |
121 | (interactive) | |
122 | (decode-hz-region (point-min) (point-max))) | |
123 | ||
124 | ;;;###autoload | |
125 | (defun encode-hz-region (beg end) | |
126 | "Encode the text in the current region to HZ. | |
127 | Return the length of resulting text." | |
128 | (interactive "r") | |
129 | (save-excursion | |
130 | (save-restriction | |
131 | (narrow-to-region beg end) | |
132 | ||
133 | ;; "~" -> "~~" | |
134 | (goto-char (point-min)) | |
135 | (while (search-forward "~" nil t) (insert ?~)) | |
136 | ||
137 | ;; Chinese GB2312 -> "~{...~}" | |
138 | (goto-char (point-min)) | |
139 | (if (re-search-forward "\\cc" nil t) | |
39e0da62 | 140 | (let (pos) |
4ed46869 | 141 | (goto-char (setq pos (match-beginning 0))) |
5dd921df | 142 | (encode-coding-region pos (point-max) 'iso-2022-7bit) |
4ed46869 KH |
143 | (goto-char pos) |
144 | (while (search-forward iso2022-gb-designation nil t) | |
145 | (delete-char -3) | |
146 | (insert hz-gb-designnation)) | |
147 | (goto-char pos) | |
148 | (while (search-forward iso2022-ascii-designation nil t) | |
149 | (delete-char -3) | |
150 | (insert hz-ascii-designnation)))) | |
151 | (- (point-max) (point-min))))) | |
152 | ||
153 | ;;;###autoload | |
154 | (defun encode-hz-buffer () | |
155 | "Encode the text in the current buffer to HZ." | |
156 | (interactive) | |
157 | (encode-hz-region (point-min) (point-max))) | |
158 | ||
159 | ;; | |
650e8505 | 160 | (provide 'china-util) |
4ed46869 | 161 | |
4ed46869 | 162 | ;;; china-util.el ends here |