Commit | Line | Data |
---|---|---|
60370d40 | 1 | ;;; viet-util.el --- utilities for Vietnamese -*- coding: iso-2022-7bit; -*- |
4ed46869 | 2 | |
acaf905b | 3 | ;; Copyright (C) 1998, 2001-2012 Free Software Foundation, Inc. |
7976eda0 | 4 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
5df4f04c | 5 | ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
eaa61218 KH |
6 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
7 | ;; Registration Number H14PRO021 | |
8f924df7 KH |
8 | ;; Copyright (C) 2003 |
9 | ;; National Institute of Advanced Industrial Science and Technology (AIST) | |
10 | ;; Registration Number H13PRO009 | |
4ed46869 KH |
11 | |
12 | ;; Keywords: mule, multilingual, Vietnamese | |
13 | ||
14 | ;; This file is part of GNU Emacs. | |
15 | ||
4936186e | 16 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
4ed46869 | 17 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
18 | ;; the Free Software Foundation, either version 3 of the License, or |
19 | ;; (at your option) any later version. | |
4ed46869 KH |
20 | |
21 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
22 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
23 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
24 | ;; GNU General Public License for more details. | |
25 | ||
26 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 27 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
4ed46869 KH |
28 | |
29 | ;;; Commentary: | |
30 | ||
31 | ;; Vietnamese uses ASCII characters and additional 134 unique | |
32 | ;; characters (these are Latin alphabets with various diacritical and | |
d2247e75 PJ |
33 | ;; tone marks). As far as I know, Vietnamese now has 5 different ways |
34 | ;; for representing these characters: VISCII, TCVN-5712, VPS, VIQR, | |
35 | ;; and Unicode. VISCII, TCVN-5712 and VPS are simple 1-byte code | |
36 | ;; which assigns 134 unique characters in control-code area | |
c80e3b4a | 37 | ;; (0x00..0x1F) and right half area (0x80..0xFF). VIQR is a mnemonic |
d2247e75 PJ |
38 | ;; encoding specification representing diacritical marks by following |
39 | ;; ASCII characters. | |
4ed46869 KH |
40 | |
41 | ;;; Code: | |
42 | ||
aade916c RS |
43 | (defvar viet-viscii-nonascii-translation-table) |
44 | ||
0c9470b3 KH |
45 | ;;;###autoload |
46 | (defun viet-encode-viscii-char (char) | |
47 | "Return VISCII character code of CHAR if appropriate." | |
95375a68 | 48 | (encode-char char 'viscii)) |
0c9470b3 | 49 | |
c80e3b4a | 50 | ;; VIQR is a mnemonic encoding specification for Vietnamese. |
4ed46869 KH |
51 | ;; It represents diacritical marks by ASCII characters as follows: |
52 | ;; ------------+----------+-------- | |
53 | ;; mark | mnemonic | example | |
54 | ;; ------------+----------+--------- | |
55 | ;; breve | ( | a( -> \e,1e\e(B | |
56 | ;; circumflex | ^ | a^ -> \e,1b\e(B | |
57 | ;; horn | + | o+ -> \e,1=\e(B | |
58 | ;; ------------+----------+--------- | |
59 | ;; acute | ' | a' -> \e,1a\e(B | |
60 | ;; grave | ` | a` -> \e,1`\e(B | |
61 | ;; hook above | ? | a? -> \e,1d\e(B | |
62 | ;; tilde | ~ | a~ -> \e,1c\e(B | |
63 | ;; dot below | . | a. -> \e,1U\e(B | |
64 | ;; ------------+----------+--------- | |
65 | ;; d bar | dd | dd -> \e,1p\e(B | |
66 | ;; ------------+----------+--------- | |
67 | ||
68 | (defvar viet-viqr-alist | |
69 | '(;; lowercase | |
70 | (?\e,1!\e(B . "a('") ; 161 | |
71 | (?\e,1"\e(B . "a(`") ; 162 | |
72 | (?\e,1#\e(B . "a(.") ; 163 | |
73 | (?\e,1$\e(B . "a^'") ; 164 | |
74 | (?\e,1%\e(B . "a^`") ; 165 | |
75 | (?\e,1&\e(B . "a^?") ; 166 | |
76 | (?\e,1'\e(B . "a^.") ; 167 | |
77 | (?\e,1(\e(B . "e~") ; 168 | |
78 | (?\e,1)\e(B . "e.") ; 169 | |
79 | (?\e,1*\e(B . "e^'") ; 170 | |
80 | (?\e,1+\e(B . "e^`") ; 171 | |
81 | (?\e,1,\e(B . "e^?") ; 172 | |
82 | (?\e,1-\e(B . "e^~") ; 173 | |
83 | (?\e,1.\e(B . "e^.") ; 174 | |
84 | (?\e,1/\e(B . "o^'") ; 175 | |
85 | (?\e,10\e(B . "o^`") ; 176 | |
86 | (?\e,11\e(B . "o^?") ; 177 | |
87 | (?\e,12\e(B . "o^~") ; 178 | |
88 | (?\e,15\e(B . "o^.") ; 181 | |
89 | (?\e,16\e(B . "o+`") ; 182 | |
90 | (?\e,17\e(B . "o+?") ; 183 | |
91 | (?\e,18\e(B . "i.") ; 184 | |
92 | (?\e,1=\e(B . "o+") ; 189 | |
93 | (?\e,1>\e(B . "o+'") ; 190 | |
94 | (?\e,1F\e(B . "a(?") ; 198 | |
95 | (?\e,1G\e(B . "a(~") ; 199 | |
96 | (?\e,1O\e(B . "y`") ; 207 | |
97 | (?\e,1Q\e(B . "u+'") ; 209 | |
98 | (?\e,1U\e(B . "a.") ; 213 | |
99 | (?\e,1V\e(B . "y?") ; 214 | |
100 | (?\e,1W\e(B . "u+`") ; 215 | |
101 | (?\e,1X\e(B . "u+?") ; 216 | |
102 | (?\e,1[\e(B . "y~") ; 219 | |
103 | (?\e,1\\e(B . "y.") ; 220 | |
104 | (?\e,1^\e(B . "o+~") ; 222 | |
105 | (?\e,1_\e(B . "u+") ; 223 | |
106 | (?\e,1`\e(B . "a`") ; 224 | |
107 | (?\e,1a\e(B . "a'") ; 225 | |
108 | (?\e,1b\e(B . "a^") ; 226 | |
109 | (?\e,1c\e(B . "a~") ; 227 | |
110 | (?\e,1d\e(B . "a?") ; 228 | |
111 | (?\e,1e\e(B . "a(") ; 229 | |
112 | (?\e,1f\e(B . "u+~") ; 230 | |
113 | (?\e,1g\e(B . "a^~") ; 231 | |
114 | (?\e,1h\e(B . "e`") ; 232 | |
115 | (?\e,1i\e(B . "e'") ; 233 | |
116 | (?\e,1j\e(B . "e^") ; 234 | |
117 | (?\e,1k\e(B . "e?") ; 235 | |
118 | (?\e,1l\e(B . "i`") ; 236 | |
119 | (?\e,1m\e(B . "i'") ; 237 | |
120 | (?\e,1n\e(B . "i~") ; 238 | |
121 | (?\e,1o\e(B . "i?") ; 239 | |
122 | (?\e,1p\e(B . "dd") ; 240 | |
123 | (?\e,1q\e(B . "u+.") ; 241 | |
124 | (?\e,1r\e(B . "o`") ; 242 | |
125 | (?\e,1s\e(B . "o'") ; 243 | |
126 | (?\e,1t\e(B . "o^") ; 244 | |
127 | (?\e,1u\e(B . "o~") ; 245 | |
128 | (?\e,1v\e(B . "o?") ; 246 | |
129 | (?\e,1w\e(B . "o.") ; 247 | |
130 | (?\e,1x\e(B . "u.") ; 248 | |
131 | (?\e,1y\e(B . "u`") ; 249 | |
132 | (?\e,1z\e(B . "u'") ; 250 | |
133 | (?\e,1{\e(B . "u~") ; 251 | |
134 | (?\e,1|\e(B . "u?") ; 252 | |
135 | (?\e,1}\e(B . "y'") ; 253 | |
136 | (?\e,1~\e(B . "o+.") ; 254 | |
137 | ||
138 | ;; upper case | |
139 | (?\e,2!\e(B . "A('") ; 161 | |
140 | (?\e,2"\e(B . "A(`") ; 162 | |
141 | (?\e,2#\e(B . "A(.") ; 163 | |
142 | (?\e,2$\e(B . "A^'") ; 164 | |
143 | (?\e,2%\e(B . "A^`") ; 165 | |
144 | (?\e,2&\e(B . "A^?") ; 166 | |
145 | (?\e,2'\e(B . "A^.") ; 167 | |
146 | (?\e,2(\e(B . "E~") ; 168 | |
147 | (?\e,2)\e(B . "E.") ; 169 | |
148 | (?\e,2*\e(B . "E^'") ; 170 | |
149 | (?\e,2+\e(B . "E^`") ; 171 | |
150 | (?\e,2,\e(B . "E^?") ; 172 | |
151 | (?\e,2-\e(B . "E^~") ; 173 | |
152 | (?\e,2.\e(B . "E^.") ; 174 | |
153 | (?\e,2/\e(B . "O^'") ; 175 | |
154 | (?\e,20\e(B . "O^`") ; 176 | |
155 | (?\e,21\e(B . "O^?") ; 177 | |
156 | (?\e,22\e(B . "O^~") ; 178 | |
157 | (?\e,25\e(B . "O^.") ; 181 | |
158 | (?\e,26\e(B . "O+`") ; 182 | |
159 | (?\e,27\e(B . "O+?") ; 183 | |
160 | (?\e,28\e(B . "I.") ; 184 | |
161 | (?\e,2=\e(B . "O+") ; 189 | |
162 | (?\e,2>\e(B . "O+'") ; 190 | |
163 | (?\e,2F\e(B . "A(?") ; 198 | |
164 | (?\e,2G\e(B . "A(~") ; 199 | |
165 | (?\e,2O\e(B . "Y`") ; 207 | |
166 | (?\e,2Q\e(B . "U+'") ; 209 | |
167 | (?\e,2U\e(B . "A.") ; 213 | |
168 | (?\e,2V\e(B . "Y?") ; 214 | |
169 | (?\e,2W\e(B . "U+`") ; 215 | |
170 | (?\e,2X\e(B . "U+?") ; 216 | |
171 | (?\e,2[\e(B . "Y~") ; 219 | |
172 | (?\e,2\\e(B . "Y.") ; 220 | |
173 | (?\e,2^\e(B . "O+~") ; 222 | |
174 | (?\e,2_\e(B . "U+") ; 223 | |
175 | (?\e,2`\e(B . "A`") ; 224 | |
176 | (?\e,2a\e(B . "A'") ; 225 | |
177 | (?\e,2b\e(B . "A^") ; 226 | |
178 | (?\e,2c\e(B . "A~") ; 227 | |
179 | (?\e,2d\e(B . "A?") ; 228 | |
180 | (?\e,2e\e(B . "A(") ; 229 | |
181 | (?\e,2f\e(B . "U+~") ; 230 | |
182 | (?\e,2g\e(B . "A^~") ; 231 | |
183 | (?\e,2h\e(B . "E`") ; 232 | |
184 | (?\e,2i\e(B . "E'") ; 233 | |
185 | (?\e,2j\e(B . "E^") ; 234 | |
186 | (?\e,2k\e(B . "E?") ; 235 | |
187 | (?\e,2l\e(B . "I`") ; 236 | |
188 | (?\e,2m\e(B . "I'") ; 237 | |
189 | (?\e,2n\e(B . "I~") ; 238 | |
190 | (?\e,2o\e(B . "I?") ; 239 | |
191 | (?\e,2p\e(B . "DD") ; 240 | |
192 | (?\e,2p\e(B . "dD") ; 240 | |
193 | (?\e,2p\e(B . "Dd") ; 240 | |
194 | (?\e,2q\e(B . "U+.") ; 241 | |
195 | (?\e,2r\e(B . "O`") ; 242 | |
196 | (?\e,2s\e(B . "O'") ; 243 | |
197 | (?\e,2t\e(B . "O^") ; 244 | |
198 | (?\e,2u\e(B . "O~") ; 245 | |
199 | (?\e,2v\e(B . "O?") ; 246 | |
200 | (?\e,2w\e(B . "O.") ; 247 | |
201 | (?\e,2x\e(B . "U.") ; 248 | |
202 | (?\e,2y\e(B . "U`") ; 249 | |
203 | (?\e,2z\e(B . "U'") ; 250 | |
204 | (?\e,2{\e(B . "U~") ; 251 | |
205 | (?\e,2|\e(B . "U?") ; 252 | |
206 | (?\e,2}\e(B . "Y'") ; 253 | |
207 | (?\e,2~\e(B . "O+.") ; 254 | |
208 | ||
209 | ;; escape from composition | |
210 | (?\( . "\\(") ; breve (left parenthesis) | |
211 | (?^ . "\\^") ; circumflex (caret) | |
212 | (?+ . "\\+") ; horn (plus sign) | |
213 | (?' . "\\'") ; acute (apostrophe) | |
214 | (?` . "\\`") ; grave (backquote) | |
215 | (?? . "\\?") ; hook above (question mark) | |
216 | (?~ . "\\~") ; tilde (tilde) | |
217 | (?. . "\\.") ; dot below (period) | |
218 | (?d . "\\d") ; d-bar (d) | |
219 | (?\\ . "\\\\") ; literal backslash | |
220 | ) | |
221 | "Alist of Vietnamese characters vs corresponding `VIQR' string.") | |
222 | ||
223 | ;; Regular expression matching single Vietnamese character represented | |
224 | ;; by VIQR. | |
225 | (defconst viqr-regexp | |
226 | "[aeiouyAEIOUY]\\([(^+]?['`?~.]\\|[(^+]\\)\\|[Dd][Dd]") | |
227 | ||
228 | ;;;###autoload | |
229 | (defun viet-decode-viqr-region (from to) | |
c256b4ab | 230 | "Convert `VIQR' mnemonics of the current region to Vietnamese characters. |
4ed46869 KH |
231 | When called from a program, expects two arguments, |
232 | positions (integers or markers) specifying the stretch of the region." | |
233 | (interactive "r") | |
234 | (save-restriction | |
235 | (narrow-to-region from to) | |
236 | (goto-char (point-min)) | |
237 | (while (re-search-forward viqr-regexp nil t) | |
238 | (let* ((viqr (buffer-substring (match-beginning 0) (match-end 0))) | |
06f84575 | 239 | (ch (car (rassoc viqr viet-viqr-alist)))) |
4ed46869 KH |
240 | (if ch |
241 | (progn | |
242 | (delete-region (match-beginning 0) (match-end 0)) | |
243 | (insert ch))))))) | |
244 | ||
245 | ;;;###autoload | |
246 | (defun viet-decode-viqr-buffer () | |
c256b4ab | 247 | "Convert `VIQR' mnemonics of the current buffer to Vietnamese characters." |
4ed46869 KH |
248 | (interactive) |
249 | (viet-decode-viqr-region (point-min) (point-max))) | |
250 | ||
251 | ;;;###autoload | |
252 | (defun viet-encode-viqr-region (from to) | |
c256b4ab | 253 | "Convert Vietnamese characters of the current region to `VIQR' mnemonics. |
4ed46869 KH |
254 | When called from a program, expects two arguments, |
255 | positions (integers or markers) specifying the stretch of the region." | |
256 | (interactive "r") | |
257 | (save-restriction | |
258 | (narrow-to-region from to) | |
259 | (goto-char (point-min)) | |
260 | (while (re-search-forward "\\cv" nil t) | |
261 | (let* ((ch (preceding-char)) | |
06f84575 | 262 | (viqr (cdr (assq ch viet-viqr-alist)))) |
4ed46869 KH |
263 | (if viqr |
264 | (progn | |
265 | (delete-char -1) | |
266 | (insert viqr))))))) | |
267 | ||
268 | ;;;###autoload | |
269 | (defun viet-encode-viqr-buffer () | |
c256b4ab | 270 | "Convert Vietnamese characters of the current buffer to `VIQR' mnemonics." |
4ed46869 KH |
271 | (interactive) |
272 | (viet-encode-viqr-region (point-min) (point-max))) | |
273 | ||
06f84575 KH |
274 | ;;;###autoload |
275 | (defun viqr-post-read-conversion (len) | |
276 | (save-excursion | |
277 | (save-restriction | |
278 | (narrow-to-region (point) (+ (point) len)) | |
279 | (let ((buffer-modified-p (buffer-modified-p))) | |
280 | (viet-decode-viqr-region (point-min) (point-max)) | |
281 | (set-buffer-modified-p buffer-modified-p) | |
282 | (- (point-max) (point-min)))))) | |
283 | ||
284 | ;;;###autoload | |
285 | (defun viqr-pre-write-conversion (from to) | |
ada51178 | 286 | (let ((old-buf (current-buffer))) |
319fb5a9 KH |
287 | (set-buffer (generate-new-buffer " *temp*")) |
288 | (if (stringp from) | |
289 | (insert from) | |
290 | (insert-buffer-substring old-buf from to)) | |
291 | (viet-encode-viqr-region (point-min) (point-max)) | |
567b103c KH |
292 | ;; Should return nil as annotations. |
293 | nil)) | |
06f84575 | 294 | |
4ed46869 | 295 | ;;; |
650e8505 | 296 | (provide 'viet-util) |
4ed46869 | 297 | |
4ed46869 | 298 | ;;; viet-util.el ends here |