Commit | Line | Data |
---|---|---|
49ed466f | 1 | ;;; titdic-cnv.el --- convert cxterm dictionary (TIT format) to Quail package |
4ed46869 | 2 | |
4ed46869 | 3 | ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. |
fa526c4a | 4 | ;; Licensed to the Free Software Foundation. |
4ed46869 KH |
5 | |
6 | ;; Keywords: Quail, TIT, cxterm | |
7 | ||
8 | ;; This file is part of GNU Emacs. | |
9 | ||
10 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
11 | ;; it under the terms of the GNU General Public License as published by | |
12 | ;; the Free Software Foundation; either version 2, or (at your option) | |
13 | ;; any later version. | |
14 | ||
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
369314dc KH |
21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
22 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
23 | ;; Boston, MA 02111-1307, USA. | |
4ed46869 KH |
24 | |
25 | ;;; Comments: | |
26 | ||
49ed466f | 27 | ;; Convert cxterm dictionary (of TIT format) to quail-package. |
4ed46869 KH |
28 | ;; |
29 | ;; Usage (within Emacs): | |
49ed466f | 30 | ;; M-x titdic-convert<CR>CXTERM-DICTIONARY-NAME<CR> |
4ed46869 | 31 | ;; Usage (from shell): |
49ed466f | 32 | ;; % emacs -batch -l titdic-cnv -f batch-titdic-convert\ |
4ed46869 KH |
33 | ;; [-dir DIR] [DIR | FILE] ... |
34 | ;; | |
35 | ;; When you run titdic-convert within Emacs, you have a chance to | |
36 | ;; modify arguments of `quail-define-package' before saving the | |
37 | ;; converted file. For instance, you are likely to modify TITLE, | |
38 | ;; DOCSTRING, and KEY-BINDINGS. | |
39 | ||
49ed466f | 40 | ;; Cxterm dictionary file (*.tit) is a line-oriented text (English, |
4ed46869 KH |
41 | ;; Chinese, Japanese, and Korean) file. The whole file contains of |
42 | ;; two parts, the definition part (`header' here after) followed by | |
43 | ;; the dictionary part (`body' here after). All lines begin with | |
44 | ;; leading '#' are ignored. | |
45 | ;; | |
46 | ;; Each line in the header part has two fields, KEY and VALUE. These | |
47 | ;; fields are separated by one or more white characters. | |
48 | ;; | |
49 | ;; Each line in the body part has two fields, KEYSEQ and TRANSLATIONS. | |
50 | ;; These fields are separated by one or more white characters. | |
51 | ;; | |
52 | ;; See the manual page of `tit2cit' of cxterm distribution for more | |
53 | ;; detail. | |
54 | ||
55 | ;;; Code: | |
56 | ||
57 | (require 'quail) | |
58 | ||
49ed466f | 59 | ;; List of values of key "ENCODE:" and the corresponding Emacs |
4ed46869 KH |
60 | ;; coding-system and language environment name. |
61 | (defvar tit-encode-list | |
a7f2c216 KH |
62 | '(("GB" euc-china "Chinese-GB") |
63 | ("BIG5" cn-big5 "Chinese-BIG5") | |
64 | ("JIS" euc-japan "Japanese") | |
49ed466f KH |
65 | ("KS" euc-kr "Korean"))) |
66 | ||
4558e816 KH |
67 | ;; Alist of input method names and the corresponding title and extra |
68 | ;; docstring. For each of input method generated from TIT dictionary, | |
69 | ;; a docstring is automatically generated from the comments in the | |
70 | ;; dictionary. The extra docstring in this alist is to add more | |
71 | ;; information. | |
72 | ;; The command describe-input-method shows the automatically generated | |
73 | ;; docstring, then an extra docstrings while replacing the form \<VAR> | |
74 | ;; by the value of variable VAR. For instance, the form | |
75 | ;; \<quail-translation-docstring> is replaced by a description about | |
76 | ;; how to select a translation from a list of candidates. | |
77 | ||
6b1e079c KH |
78 | (defvar quail-cxterm-package-ext-info |
79 | '(("chinese-4corner" "\e$(0(?-F\e(B") | |
80 | ("chinese-array30" "\e$(0#R#O\e(B") | |
4558e816 KH |
81 | ("chinese-ccdospy" "\e$AKuF4\e(B" |
82 | "Pinyin base input method for Chinese charset GB2312 \(`chinese-gb2312'). | |
83 | ||
84 | Pinyin is the standared roman transliteration method for Chinese. | |
85 | For the detail of Pinyin system, see the documentation of the input | |
86 | method `chinese-py'. | |
87 | ||
88 | This input method works almost the same way as `chinese-py'. The | |
89 | difference is that you type a single key for these Pinyin spelling. | |
90 | Pinyin: zh en eng ang ch an ao ai ong sh ing yu(\e$A(9\e(B) | |
91 | keyseq: a f g h i j k l s u y v | |
92 | For expample: | |
93 | Chinese: \e$A0!\e(B \e$A9{\e(B \e$AVP\e(B \e$AND\e(B \e$A9b\e(B \e$ASq\e(B \e$AH+\e(B | |
94 | Pinyin: a guo zhong wen guang yu quan | |
95 | Keyseq: a1 guo4 as1 wf4 guh1 yu..6 qvj6 | |
96 | ||
97 | \\<quail-translation-docstring> | |
98 | ||
99 | For double-width GB2312 characters correponding to ASCII, use the | |
100 | input method `chinese-qj'.") | |
101 | ||
6b1e079c | 102 | ("chinese-ctlau" "\e$AAuTA\e(B") |
4558e816 | 103 | |
6b1e079c | 104 | ("chinese-ctlaub" "\e$(0N,Gn\e(B") |
4558e816 KH |
105 | |
106 | ("chinese-ecdict" "\e$(05CKH\e(B" | |
107 | "In this input method, you enter a Chinese (Big5) charactere or word | |
108 | by typing the corresponding English word. For example, if you type | |
109 | \"computer\", \"\e$(0IZH+\e(B\" is input. | |
110 | ||
111 | \\<quail-translation-docstring>") | |
112 | ||
113 | ("chinese-etzy" "\e$(06/0D\e(B" | |
114 | "Zhuyin base input method for Chinese Big5 characters (`chinese-big5-1', | |
115 | `chinese-big5-2'). | |
116 | ||
117 | Zhuyin is a kind of phonetic symbol. One to three Zhuyin symbols | |
118 | compose one Chinese character. | |
119 | ||
120 | In this input method, you enter a Chinese character by first typing | |
121 | keys corresponding to Zhuyin symbols (see the above table) followed by | |
122 | SPC, 1, 2, 3, or 4 specifing a tone (SPC:\e$(0?v(N\e(B, 1:\e$(0M=Vy\e(B, 2:\e$(0Dm(N\e(B, 3: \e$(0&9Vy\e(B, | |
123 | 4:\e$(0(+Vy\e(B). | |
124 | ||
125 | \\<quail-translation-docstring>") | |
6b1e079c KH |
126 | |
127 | ("chinese-punct-b5" "\e$(0O:\e(BB" | |
128 | "Input method for Chinese punctuations and symbols of Big5 | |
129 | \(`chinese-big5-1' and `chinese-big5-2').") | |
130 | ||
131 | ("chinese-punct" "\e$A1j\e(BG" | |
132 | "Input method for Chinese punctuations and symbols of GB2312 | |
133 | \(`chinese-gb2312').") | |
134 | ||
135 | ("chinese-py-b5" "\e$(03<\e(BB" | |
136 | "Pinyin base input method for Chinese Big5 characters | |
137 | \(`chinese-big5-1', `chinese-big5-2'). | |
138 | ||
139 | This input method works almost the same way as `chinese-py' (which | |
140 | see). | |
141 | ||
142 | This input method supports only Han characters. The more convenient | |
43b11fee EZ |
143 | method is `chinese-py-punct-b5', which is the combination of this |
144 | method and `chinese-punct-b5' and which supports both Han characters | |
145 | and punctuation/symbols. | |
6b1e079c | 146 | |
43b11fee | 147 | For double-width Big5 characters corresponding to ASCII, use the input |
6b1e079c KH |
148 | method `chinese-qj-b5'. |
149 | ||
150 | The input method `chinese-py' and `chinese-tonepy' are also Pinyin | |
43b11fee | 151 | based, but for the character set GB2312 (`chinese-gb2312').") |
6b1e079c KH |
152 | |
153 | ("chinese-py" "\e$AF4\e(BG" | |
154 | "Pinyin base input method for Chinese charset GB2312 | |
155 | \(`chinese-gb2312'). | |
156 | ||
157 | Pinyin is the standared roman transliteration method for Chinese. | |
43b11fee EZ |
158 | Pinyin uses a sequence of Latin alphabetic characters for each Chinese |
159 | character. The sequence is made by the combination of the initials | |
4558e816 | 160 | \(the beginning sounds) and finals \(the ending sounds). |
6b1e079c KH |
161 | |
162 | initials: b p m f d t n l z c s zh ch sh r j q x g k h | |
163 | finals: a o e i er ai ei oa ou an en ang eng ong i ia iao ie iu ian in | |
164 | iang ing iong u ua uo uai ui uan un uan ueng yu yue yuan yun | |
165 | ||
166 | (Note: In the correct Pinyin writing, the sequence \"yu\" in the last | |
167 | four finals should be written by the character u-umlaut `\e$A(9\e(B'.) | |
168 | ||
4558e816 KH |
169 | With this input method, you enter a Chinese character by first |
170 | entering its pinyin spelling. | |
171 | ||
172 | \\<quail-translation-docstring> | |
6b1e079c KH |
173 | |
174 | For instance, to input \e$ADc\e(B, you type \"n i C-n 3\". The first \"n i\" | |
175 | is a Pinyin, \"C-n\" selects the next group of candidates (each group | |
176 | contains at most 10 characters), \"3\" select the third character in | |
43b11fee | 177 | that group. |
6b1e079c | 178 | |
4558e816 KH |
179 | This input method supports only Han characters. The related input |
180 | method which `chinese-py-punct' is the combination of this method and | |
181 | `chinese-punct'; it supports both Han characters and punctuation | |
182 | characters. | |
6b1e079c | 183 | |
43b11fee | 184 | For double-width GB2312 characters corresponding to ASCII, use the |
6b1e079c KH |
185 | input method `chinese-qj'. |
186 | ||
187 | The correct Pinyin system specifies tones by diacritical marks, but | |
188 | this input method doesn't use them, which results in easy (you don't | |
43b11fee EZ |
189 | have to know the exact tones), but verbose (many characters are assigned |
190 | to the same key sequence) input. You may also want to try the input | |
6b1e079c KH |
191 | method `chinese-tonepy' with which you must specify tones by digits |
192 | \(1..5).") | |
193 | ||
4558e816 KH |
194 | ("chinese-qj-b5" "\e$(0)A\e(BB") |
195 | ||
196 | ("chinese-qj" "\e$AH+\e(BG") | |
197 | ||
6b1e079c | 198 | ("chinese-sw" "\e$AJWN2\e(B" |
4558e816 KH |
199 | "Radical base input method for Chinese charset GB2312 (`chinese-gb2312'). |
200 | ||
201 | In this input method, you enter a Chinese character byte typing two | |
202 | keys. characters. The first key corresponds to the first (\e$AJW\e(B) | |
203 | radical, the second key corresponds to the last (\e$AN2\e(B) radical. The | |
204 | correspondance of keys and radicals are as below: | |
205 | ||
206 | first radical: | |
207 | a b c d e f g h i j k l m n o p q r s t u v w x y z | |
208 | \e$APD\e(B \e$AZ"\e(B \e$AJ,\e(B \e$AX<\e(B \e$A;p\e(B \e$A?Z\e(B \e$A^P\e(B \e$Ac_\e(B \e$AZ%\e(B \e$A\3\e(B \e$AXi\e(B \e$AD>\e(B \e$Alj\e(B \e$Ab;\e(B \e$ATB\e(B \e$Afy\e(B \e$AJ/\e(B \e$AMu\e(B \e$A0K\e(B \e$AX/\e(B \e$AHU\e(B \e$AeA\e(B \e$Aak\e(B \e$AVq\e(B \e$AR;\e(B \e$AHK\e(B | |
209 | last radical: | |
210 | a b c d e f g h i j k l m n o p q r s t u v w x y z | |
211 | \e$ASV\e(B \e$AI=\e(B \e$AMA\e(B \e$A56\e(B \e$AZb\e(B \e$A?Z\e(B \e$ARB\e(B \e$Aqb\e(B \e$A4s\e(B \e$A6!\e(B \e$A[L\e(B \e$Ala\e(B \e$AJ.\e(B \e$A4u\e(B \e$AXg\e(B \e$ACE\e(B \e$A=q\e(B \e$AX-\e(B \e$AE.\e(B \e$ARR\e(B \e$A`m\e(B \e$AP!\e(B \e$A3'\e(B \e$A3f\e(B \e$A_.\e(B \e$A27\e(B | |
212 | ||
213 | \<quail-translation-docstring>") | |
214 | ||
6b1e079c KH |
215 | ("chinese-tonepy" "\e$A5wF4\e(B" |
216 | "Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312'). | |
217 | ||
218 | Pinyin is the standared roman transliteration method for Chinese. | |
219 | For the detail of Pinyin system, see the documentation of the input | |
220 | method `chinese-py'. | |
221 | ||
222 | This input method works almost the same way as `chinese-py'. The | |
4558e816 KH |
223 | difference is that you must type 1..5 after each Pinyin spelling to |
224 | specify a tone (1:\e$ARuF=\e(B, 2:\e$AQtF=\e(B, 3:\e$AIOIy\e(B, 4\e$AOBIy\e(B, 5:\e$AGaIy\e(B). | |
225 | ||
226 | \<quail-translation-docstring> | |
227 | ||
228 | For instance, to input \e$ADc\e(B, you type \"n i 3 3\", the first \"n i\" is | |
229 | a Pinyin, the next \"3\" specifies tone, and the last \"3\" selects | |
230 | the third character from the candidate list. | |
6b1e079c KH |
231 | |
232 | For double-width GB2312 characters correponding to ASCII, use the | |
233 | input method `chinese-qj'.") | |
234 | ||
4558e816 KH |
235 | ("chinese-ziranma" "\e$AK+F4\e(B" |
236 | "Pinyin base input method for Chinese GB2312 characters (`chinese-gb2312'). | |
237 | ||
238 | Pinyin is the standared roman transliteration method for Chinese. | |
239 | For the detail of Pinyin system, see the documentation of the input | |
240 | method `chinese-py'. | |
241 | ||
242 | In this input method, unlike the standard spelling of Pinyin, all | |
243 | initials and finals are assigned to single keys (see the above table). | |
244 | For instance, the initial \"ch\" is assigned to the key `i', the final | |
245 | \"iu\" is assigned to the key `q'. And tones 1, 2, 3, 4, and \e$AGaIy\e(B are | |
246 | assigned to the keys `q', `w', `e', `r', `t' respectively. | |
247 | ||
248 | \<quail-translation-docstring> | |
249 | ||
250 | To input one letter Chinese words, you type 4 keys, the first two for | |
251 | the Pinyin of the letter, next one for tone, and the last one is | |
252 | always quote ('). For instance, \"vsq'1\" input \e$AVP\e(B. Exceptions are | |
253 | these letters. You can input them just by typing a single key. | |
254 | ||
255 | Character: \e$A04\e(B \e$A2;\e(B \e$A4N\e(B \e$A5D\e(B \e$A6~\e(B \e$A7"\e(B \e$A8v\e(B \e$A:M\e(B \e$A3v\e(B \e$A<0\e(B \e$A?I\e(B \e$AAK\e(B \e$AC;\e(B | |
256 | Key: a b c d e f g h i j k l m | |
257 | Character: \e$ADc\e(B \e$AE7\e(B \e$AF,\e(B \e$AF_\e(B \e$AHK\e(B \e$AH}\e(B \e$AK{\e(B \e$AJG\e(B \e$AWE\e(B \e$ANR\e(B \e$AP!\e(B \e$AR;\e(B \e$ATZ\e(B | |
258 | Key: n o p q r s t u v w x y z | |
259 | ||
260 | To input two letter words, you have two ways. One way is to type 4 | |
261 | keys, two for the first Pinyin, two for the second Pinyin. For | |
262 | instance, \"vsgo\" input \e$AVP9z\e(B. Another is to type 3 keys\; initials | |
263 | of two letters, and quote ('). For instance, \"vg'\" also input \e$AVP9z\e(B. | |
264 | ||
265 | To input three letter words, you type 4 keys\; initials of three | |
266 | letters, and the last is quote ('). For instance, \"bjy'2\" input \e$A11\e(B | |
267 | \e$A>)Q<\e(B (the last `2' is to select one from candidates). | |
268 | ||
269 | To input words of more than three letters, you type 4 keys, initials | |
270 | of the first three letters and the last letter. For instance, | |
271 | \"bjdt\" input \e$A11>)5gJSL(\e(B. | |
272 | ||
273 | To input symbols and punctuations, type `/' followed by one of `a' to | |
274 | `z', then select one from candidates. | |
275 | ||
276 | ") | |
277 | ||
278 | ("chinese-zozy" "\e$(0I\0D\e(B" | |
279 | "Zhuyin base input method for Chinese Big5 characters (`chinese-big5-1', | |
280 | `chinese-big5-2'). | |
281 | ||
282 | Zhuyin is a kind of phonetic symbol. One to three Zhuyin symbols | |
283 | compose a Chinese character. | |
284 | ||
285 | In this input method, you enter a Chinese character by first typing | |
286 | keys corresponding to Zhuyin symbols (see the above table) followed by | |
287 | SPC, 6, 3, 4, or 7 specifing a tone (SPC:\e$(0?v(N\e(B, 6:\e$(0Dm(N\e(B, 3:\e$(0&9Vy\e(B, 4:\e$(0(+Vy\e(B, | |
288 | 7:\e$(0M=Vy\e(B). | |
289 | ||
290 | \<quail-translation-docstring>"))) | |
4ed46869 KH |
291 | |
292 | ;; Return a value of the key in the current line. | |
293 | (defsubst tit-read-key-value () | |
294 | (if (looking-at "[^ \t\n]+") | |
295 | (car (read-from-string (concat "\"" (match-string 0) "\""))))) | |
296 | ||
297 | ;; Return an appropriate quail-package filename from FILENAME (TIT | |
49ed466f KH |
298 | ;; dictionary filename). For instance, ".../ZOZY.tit" -> "ZOZY.el". |
299 | (defun tit-make-quail-package-file-name (filename &optional dirname) | |
4ed46869 | 300 | (expand-file-name |
49ed466f | 301 | (concat (file-name-nondirectory (substring filename 0 -4)) ".el") |
4ed46869 KH |
302 | dirname)) |
303 | ||
1375754c KH |
304 | ;; This value is nil if we are processing phrase dictionary. |
305 | (defconst tit-dictionary t) | |
4ed46869 KH |
306 | (defvar tit-encode nil) |
307 | (defvar tit-default-encode "GB") | |
308 | ||
309 | ;; Generate elements of KEY-BINDINGS arg for `quail-define-package' so | |
310 | ;; that each characters in KEYS invokes FUNCTION-SYMBOL. | |
311 | (defun tit-generate-key-bindings (keys function-symbol) | |
312 | (let ((len (length keys)) | |
313 | (i 0) | |
1375754c | 314 | (first t) |
4ed46869 KH |
315 | key) |
316 | (while (< i len) | |
1375754c | 317 | (or first (princ "\n ")) |
4ed46869 | 318 | (setq key (aref keys i)) |
1375754c KH |
319 | (if (if (< key ?\ ) |
320 | (eq (lookup-key quail-translation-keymap | |
321 | (char-to-string key)) | |
4ed46869 | 322 | 'quail-execute-non-quail-command) |
1375754c KH |
323 | (<= key 127)) |
324 | (progn | |
325 | (princ (cons (cond ((< key ?\ ) (format "\"\\C-%c\"" (+ key ?@))) | |
326 | ((< key 127) (format "\"%c\"" key)) | |
327 | (t "\"\\C-?\"")) | |
328 | function-symbol)) | |
329 | (setq first nil))) | |
4ed46869 KH |
330 | (setq i (1+ i))))) |
331 | ||
332 | ;; Analyze header part of TIT dictionary and generate an appropriate | |
333 | ;; `quail-define-package' function call. | |
334 | (defun tit-process-header (filename) | |
335 | (message "Processing header part...") | |
336 | (goto-char (point-min)) | |
337 | ||
1375754c KH |
338 | ;; At first, generate header part of the Quail package while |
339 | ;; collecting information from the original header. | |
340 | (let ((package (concat | |
341 | "chinese-" | |
342 | (substring (downcase (file-name-nondirectory filename)) | |
343 | 0 -4))) | |
344 | ;; TIT keywords and the corresponding default values. | |
4ed46869 KH |
345 | (tit-multichoice t) |
346 | (tit-prompt "") | |
347 | (tit-comments nil) | |
348 | (tit-backspace "\010\177") | |
349 | (tit-deleteall "\015\025") | |
350 | (tit-moveright ".>") | |
351 | (tit-moveleft ",<") | |
352 | (tit-keyprompt nil)) | |
1375754c KH |
353 | |
354 | (princ ";; Quail package `") | |
355 | (princ package) | |
356 | (princ "' generated by the command `titdic-convert'\n;;\tDate: ") | |
357 | (princ (current-time-string)) | |
358 | (princ "\n;;\tOriginal TIT dictionary file: ") | |
359 | (princ (file-name-nondirectory filename)) | |
360 | (princ "\n\n;;; Comment:\n\n") | |
86e4f7c0 | 361 | (princ ";; Byte-compile this file again after any modification.\n\n") |
1375754c KH |
362 | (princ ";;; Start of the header of original TIT dictionary.\n\n") |
363 | ||
4ed46869 | 364 | (while (not (eobp)) |
1375754c KH |
365 | (let ((ch (following-char)) |
366 | (pos (point))) | |
4ed46869 KH |
367 | (cond ((= ch ?C) ; COMMENT |
368 | (cond ((looking-at "COMMENT") | |
369 | (let ((pos (match-end 0))) | |
370 | (end-of-line) | |
4ed46869 KH |
371 | (setq tit-comments (cons (buffer-substring pos (point)) |
372 | tit-comments)))))) | |
373 | ((= ch ?M) ; MULTICHOICE, MOVERIGHT, MOVELEFT | |
374 | (cond ((looking-at "MULTICHOICE:[ \t]*") | |
375 | (goto-char (match-end 0)) | |
376 | (setq tit-multichoice (looking-at "YES"))) | |
377 | ((looking-at "MOVERIGHT:[ \t]*") | |
378 | (goto-char (match-end 0)) | |
379 | (setq tit-moveright (tit-read-key-value))) | |
380 | ((looking-at "MOVELEFT:[ \t]*") | |
381 | (goto-char (match-end 0)) | |
382 | (setq tit-moveleft (tit-read-key-value))))) | |
383 | ((= ch ?P) ; PROMPT | |
384 | (cond ((looking-at "PROMPT:[ \t]*") | |
385 | (goto-char (match-end 0)) | |
6b1e079c KH |
386 | (setq tit-prompt (tit-read-key-value)) |
387 | ;; Some TIT dictionaies that are encoded by | |
388 | ;; euc-china contains invalid character at the tail. | |
389 | (let* ((last (aref tit-prompt (1- (length tit-prompt)))) | |
390 | (split (split-char last))) | |
391 | (if (or (eq (nth 1 split) 32) | |
392 | (eq (nth 2 split) 32)) | |
393 | (setq tit-prompt (substring tit-prompt 0 -1))))))) | |
4ed46869 KH |
394 | ((= ch ?B) ; BACKSPACE, BEGINDICTIONARY, |
395 | ; BEGINPHRASE | |
396 | (cond ((looking-at "BACKSPACE:[ \t]*") | |
397 | (goto-char (match-end 0)) | |
398 | (setq tit-backspace (tit-read-key-value))) | |
399 | ((looking-at "BEGINDICTIONARY") | |
1375754c | 400 | (setq tit-dictionary t)) |
4ed46869 | 401 | ((looking-at "BEGINPHRASE") |
1375754c | 402 | (setq tit-dictionary nil)))) |
4ed46869 KH |
403 | ((= ch ?K) ; KEYPROMPT |
404 | (cond ((looking-at "KEYPROMPT(\\(.*\\)):[ \t]*") | |
405 | (let ((key-char (match-string 1))) | |
406 | (goto-char (match-end 0)) | |
1fa1e1f5 RS |
407 | (if (string-match "\\\\[0-9]+" key-char) |
408 | (setq key-char | |
409 | (car (read-from-string (format "\"%s\"" | |
410 | key-char))))) | |
4ed46869 KH |
411 | (setq tit-keyprompt |
412 | (cons (cons key-char (tit-read-key-value)) | |
1375754c KH |
413 | tit-keyprompt))))))) |
414 | (end-of-line) | |
415 | (princ ";; ") | |
416 | (princ (buffer-substring pos (point))) | |
417 | (princ "\n") | |
418 | (forward-line 1))) | |
4ed46869 | 419 | |
1375754c KH |
420 | (princ "\n;;; End of the header of original TIT dictionary.\n\n") |
421 | (princ ";;; Code:\n\n(require 'quail)\n\n") | |
422 | ||
423 | (princ "(quail-define-package ") | |
424 | ;; Args NAME, LANGUAGE, TITLE | |
6b1e079c | 425 | (let ((title (nth 1 (assoc package quail-cxterm-package-ext-info)))) |
1375754c KH |
426 | (princ "\"") |
427 | (princ package) | |
428 | (princ "\" \"") | |
429 | (princ (nth 2 (assoc tit-encode tit-encode-list))) | |
430 | (princ "\" \"") | |
431 | (princ (or title | |
432 | (if (string-match "[:\e$A!K\e$(0!(!J\e(B]+\\([^:\e$A!K\e$(0!(!K\e(B]+\\)" tit-prompt) | |
433 | (substring tit-prompt (match-beginning 1) (match-end 1)) | |
434 | tit-prompt))) | |
435 | (princ "\"\n")) | |
4ed46869 KH |
436 | |
437 | ;; Arg GUIDANCE | |
438 | (if tit-keyprompt | |
439 | (progn | |
1375754c | 440 | (princ " '(") |
4ed46869 | 441 | (while tit-keyprompt |
1375754c KH |
442 | (princ " ") |
443 | (princ (format "(%d . \"%s\")\n" | |
444 | (string-to-char (car (car tit-keyprompt))) | |
445 | (cdr (car tit-keyprompt)))) | |
4ed46869 | 446 | (setq tit-keyprompt (cdr tit-keyprompt))) |
1375754c KH |
447 | (princ ")")) |
448 | (princ " t\n")) | |
4ed46869 KH |
449 | |
450 | ;; Arg DOCSTRING | |
6b1e079c KH |
451 | (let ((doc (concat tit-prompt "\n")) |
452 | (comments (if tit-comments | |
453 | (mapconcat 'identity (nreverse tit-comments) "\n"))) | |
454 | (doc-ext (nth 2 (assoc package quail-cxterm-package-ext-info)))) | |
455 | (if comments | |
456 | (setq doc (concat doc "\n" comments "\n"))) | |
457 | (if doc-ext | |
458 | (setq doc (concat doc "\n" doc-ext "\n"))) | |
459 | (prin1 doc) | |
460 | (terpri)) | |
4ed46869 KH |
461 | |
462 | ;; Arg KEY-BINDINGS | |
1375754c | 463 | (princ " '(") |
4ed46869 | 464 | (tit-generate-key-bindings tit-backspace 'quail-delete-last-char) |
1375754c | 465 | (princ "\n ") |
4ed46869 | 466 | (tit-generate-key-bindings tit-deleteall 'quail-abort-translation) |
1375754c | 467 | (princ "\n ") |
4ed46869 | 468 | (tit-generate-key-bindings tit-moveright 'quail-next-translation) |
1375754c | 469 | (princ "\n ") |
4ed46869 | 470 | (tit-generate-key-bindings tit-moveleft 'quail-prev-translation) |
1375754c | 471 | (princ ")\n") |
4ed46869 KH |
472 | |
473 | ;; Args FORGET-TRANSLATION, DETERMINISTIC, KBD-TRANSLATE, SHOW-LAYOUT. | |
474 | ;; The remaining args are all nil. | |
1375754c KH |
475 | (princ " nil") |
476 | (princ (if tit-multichoice " nil" " t")) | |
477 | (princ (if tit-keyprompt " t t)\n\n" " nil nil)\n\n")))) | |
478 | ||
479 | (defsubst tit-flush-translations (key translations) | |
480 | (if (string-match "\\\\[0-9][0-9][0-9]" key) | |
481 | (let ((newkey (concat (substring key 0 (match-beginning 0)) | |
482 | (car (read-from-string | |
483 | (concat "\"" (match-string 0 key) "\""))))) | |
484 | (idx (match-end 0))) | |
485 | (while (string-match "\\\\[0-9][0-9][0-9]" key idx) | |
486 | (setq newkey (concat | |
487 | newkey | |
488 | (substring key idx (match-beginning 0)) | |
489 | (car (read-from-string | |
490 | (concat "\"" (match-string 0 key) "\""))))) | |
491 | (setq idx (match-end 0))) | |
492 | (setq key (concat newkey (substring key idx))))) | |
493 | (prin1 (list key (if tit-dictionary translations | |
494 | (vconcat (nreverse translations))))) | |
495 | (princ "\n")) | |
4ed46869 KH |
496 | |
497 | ;; Convert body part of TIT dictionary into `quail-define-rules' | |
498 | ;; function call. | |
499 | (defun tit-process-body () | |
500 | (message "Formatting translation rules...") | |
1375754c KH |
501 | (let* ((template (list nil nil)) |
502 | (second (cdr template)) | |
503 | (prev-key "") | |
504 | ch key translations pos) | |
505 | (princ "(quail-define-rules\n") | |
4ed46869 | 506 | (while (null (eobp)) |
1375754c KH |
507 | (setq ch (following-char)) |
508 | (if (or (= ch ?#) (= ch ?\n)) | |
509 | (forward-line 1) | |
4ed46869 | 510 | (setq pos (point)) |
1375754c KH |
511 | (skip-chars-forward "^ \t\n") |
512 | (setq key (buffer-substring pos (point))) | |
4ed46869 | 513 | (skip-chars-forward " \t") |
1375754c KH |
514 | (setq ch (following-char)) |
515 | (if (or (= ch ?#) (= ch ?\n)) | |
08a1bf22 | 516 | ;; This entry contains no translations. Let's ignore it. |
1375754c KH |
517 | (forward-line 1) |
518 | (or (string= key prev-key) | |
08a1bf22 | 519 | (progn |
1375754c KH |
520 | (if translations |
521 | (tit-flush-translations prev-key translations)) | |
522 | (setq translations nil | |
523 | prev-key key))) | |
524 | (if tit-dictionary | |
525 | (progn | |
526 | (setq pos (point)) | |
527 | (skip-chars-forward "^ \t#\n") | |
528 | (setq translations | |
529 | (if translations | |
530 | (concat translations | |
531 | (buffer-substring pos (point))) | |
532 | (buffer-substring pos (point))))) | |
533 | (while (not (eolp)) | |
534 | (setq pos (point)) | |
535 | (skip-chars-forward "^ \t\n") | |
536 | (setq translations (cons (buffer-substring pos (point)) | |
537 | translations)) | |
538 | (skip-chars-forward " \t") | |
539 | (setq ch (following-char)) | |
540 | (if (= ch ?#) (end-of-line)))) | |
08a1bf22 | 541 | (forward-line 1)))) |
1375754c KH |
542 | |
543 | (if translations | |
544 | (tit-flush-translations prev-key translations)) | |
545 | (princ ")\n"))) | |
4ed46869 KH |
546 | |
547 | ;;;###autoload | |
548 | (defun titdic-convert (filename &optional dirname) | |
549 | "Convert a TIT dictionary of FILENAME into a Quail package. | |
550 | Optional argument DIRNAME if specified is the directory name under which | |
551 | the generated Quail package is saved." | |
552 | (interactive "FTIT dictionary file: ") | |
1375754c KH |
553 | (with-temp-file (tit-make-quail-package-file-name filename dirname) |
554 | (set-buffer-file-coding-system 'iso-2022-7bit) | |
555 | (let ((standard-output (current-buffer))) | |
556 | (with-temp-buffer | |
557 | (let ((coding-system-for-read 'no-conversion)) | |
558 | (insert-file-contents (expand-file-name filename))) | |
ecd57ad4 | 559 | (set-buffer-multibyte t) |
1375754c KH |
560 | |
561 | ;; Decode the buffer contents from the encoding specified by a | |
562 | ;; value of the key "ENCODE:". | |
563 | (if (not (search-forward "\nBEGIN" nil t)) | |
564 | (error "TIT dictionary doesn't have body part")) | |
565 | (let ((limit (point)) | |
566 | coding-system slot) | |
567 | (goto-char (point-min)) | |
568 | (if (re-search-forward "^ENCODE:[ \t]*" limit t) | |
569 | (progn | |
570 | (goto-char (match-end 0)) | |
571 | (setq tit-encode (tit-read-key-value))) | |
572 | (setq tit-encode tit-default-encode)) | |
573 | (setq slot (assoc tit-encode tit-encode-list)) | |
574 | (if (not slot) | |
575 | (error "Invalid ENCODE: value in TIT dictionary")) | |
576 | (setq coding-system (nth 1 slot)) | |
86e4f7c0 | 577 | (message "Decoding with coding system %s..." coding-system) |
1375754c KH |
578 | (goto-char (point-min)) |
579 | (decode-coding-region (point-min) (point-max) coding-system)) | |
580 | ||
581 | ;; Set point the starting position of the body part. | |
582 | (goto-char (point-min)) | |
583 | (if (not (search-forward "\nBEGIN" nil t)) | |
584 | (error "TIT dictionary can't be decoded correctly")) | |
585 | ||
586 | ;; Process the header part. | |
587 | (forward-line 1) | |
588 | (narrow-to-region (point-min) (point)) | |
589 | (tit-process-header filename) | |
590 | (widen) | |
591 | ||
592 | ;; Process the body part. For speed, we turn off multibyte facility. | |
593 | (with-current-buffer standard-output | |
594 | (set-buffer-multibyte nil)) | |
595 | (set-buffer-multibyte nil) | |
596 | (tit-process-body))))) | |
4ed46869 KH |
597 | |
598 | ;;;###autoload | |
44cbfae9 | 599 | (defun batch-titdic-convert (&optional force) |
4ed46869 KH |
600 | "Run `titdic-convert' on the files remaining on the command line. |
601 | Use this from the command line, with `-batch'; | |
602 | it won't work in an interactive Emacs. | |
603 | For example, invoke \"emacs -batch -f batch-titdic-convert XXX.tit\" to | |
604 | generate Quail package file \"xxx.el\" from TIT dictionary file \"XXX.tit\". | |
605 | To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"." | |
606 | (defvar command-line-args-left) ; Avoid compiler warning. | |
607 | (if (not noninteractive) | |
608 | (error "`batch-titdic-convert' should be used only with -batch")) | |
609 | (if (string= (car command-line-args-left) "-h") | |
610 | (progn | |
611 | (message "To convert XXX.tit and YYY.tit into xxx.el and yyy.el:") | |
612 | (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert XXX.tit YYY.tit") | |
613 | (message "To convert XXX.tit into DIR/xxx.el:") | |
614 | (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert -dir DIR XXX.tit")) | |
615 | (let (targetdir filename files file) | |
616 | (if (string= (car command-line-args-left) "-dir") | |
617 | (progn | |
618 | (setq command-line-args-left (cdr command-line-args-left)) | |
619 | (setq targetdir (car command-line-args-left)) | |
620 | (setq command-line-args-left (cdr command-line-args-left)))) | |
621 | (while command-line-args-left | |
622 | (setq filename (expand-file-name (car command-line-args-left))) | |
623 | (if (file-directory-p filename) | |
624 | (progn | |
625 | (message "Converting all tit files in the directory %s" filename) | |
626 | (setq files (directory-files filename t "\\.tit$"))) | |
627 | (setq files (list filename))) | |
628 | (while files | |
629 | (setq file (expand-file-name (car files))) | |
44cbfae9 KH |
630 | (when (or force |
631 | (file-newer-than-file-p | |
632 | file (tit-make-quail-package-file-name file targetdir))) | |
633 | (message "Converting %s to quail-package..." file) | |
634 | (titdic-convert file targetdir)) | |
4ed46869 KH |
635 | (setq files (cdr files))) |
636 | (setq command-line-args-left (cdr command-line-args-left))) | |
86e4f7c0 | 637 | (message "Byte-compile the created files by:") |
4ed46869 KH |
638 | (message " %% emacs -batch -f batch-byte-compile XXX.el"))) |
639 | (kill-emacs 0)) | |
640 | ||
641 | ;;; titdic-cnv.el ends here |