Commit | Line | Data |
---|---|---|
49ed466f | 1 | ;;; titdic-cnv.el --- convert cxterm dictionary (TIT format) to Quail package |
4ed46869 | 2 | |
4ed46869 | 3 | ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. |
fa526c4a | 4 | ;; Licensed to the Free Software Foundation. |
4ed46869 KH |
5 | |
6 | ;; Keywords: Quail, TIT, cxterm | |
7 | ||
8 | ;; This file is part of GNU Emacs. | |
9 | ||
10 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
11 | ;; it under the terms of the GNU General Public License as published by | |
12 | ;; the Free Software Foundation; either version 2, or (at your option) | |
13 | ;; any later version. | |
14 | ||
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
369314dc KH |
21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
22 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
23 | ;; Boston, MA 02111-1307, USA. | |
4ed46869 KH |
24 | |
25 | ;;; Comments: | |
26 | ||
49ed466f | 27 | ;; Convert cxterm dictionary (of TIT format) to quail-package. |
4ed46869 KH |
28 | ;; |
29 | ;; Usage (within Emacs): | |
49ed466f | 30 | ;; M-x titdic-convert<CR>CXTERM-DICTIONARY-NAME<CR> |
4ed46869 | 31 | ;; Usage (from shell): |
49ed466f | 32 | ;; % emacs -batch -l titdic-cnv -f batch-titdic-convert\ |
4ed46869 KH |
33 | ;; [-dir DIR] [DIR | FILE] ... |
34 | ;; | |
35 | ;; When you run titdic-convert within Emacs, you have a chance to | |
36 | ;; modify arguments of `quail-define-package' before saving the | |
37 | ;; converted file. For instance, you are likely to modify TITLE, | |
38 | ;; DOCSTRING, and KEY-BINDINGS. | |
39 | ||
49ed466f | 40 | ;; Cxterm dictionary file (*.tit) is a line-oriented text (English, |
4ed46869 KH |
41 | ;; Chinese, Japanese, and Korean) file. The whole file contains of |
42 | ;; two parts, the definition part (`header' here after) followed by | |
43 | ;; the dictionary part (`body' here after). All lines begin with | |
44 | ;; leading '#' are ignored. | |
45 | ;; | |
46 | ;; Each line in the header part has two fields, KEY and VALUE. These | |
47 | ;; fields are separated by one or more white characters. | |
48 | ;; | |
49 | ;; Each line in the body part has two fields, KEYSEQ and TRANSLATIONS. | |
50 | ;; These fields are separated by one or more white characters. | |
51 | ;; | |
52 | ;; See the manual page of `tit2cit' of cxterm distribution for more | |
53 | ;; detail. | |
54 | ||
55 | ;;; Code: | |
56 | ||
57 | (require 'quail) | |
58 | ||
49ed466f | 59 | ;; List of values of key "ENCODE:" and the corresponding Emacs |
4ed46869 KH |
60 | ;; coding-system and language environment name. |
61 | (defvar tit-encode-list | |
a7f2c216 KH |
62 | '(("GB" euc-china "Chinese-GB") |
63 | ("BIG5" cn-big5 "Chinese-BIG5") | |
64 | ("JIS" euc-japan "Japanese") | |
49ed466f KH |
65 | ("KS" euc-kr "Korean"))) |
66 | ||
67 | ;; List of package names and the corresponding titles. | |
68 | (defvar quail-cxterm-package-title-alist | |
69 | '(("chinese-4corner" . "\e$(0(?-F\e(B") | |
70 | ("chinese-array30" . "\e$(0#R#O\e(B") | |
71 | ("chinese-ccdospy" . "\e$AKuF4\e(B") | |
72 | ("chinese-ctlau" . "\e$AAuTA\e(B") | |
73 | ("chinese-ctlaub" . "\e$(0N,Gn\e(B") | |
74 | ("chinese-ecdict" . "\e$(05CKH\e(B") | |
75 | ("chinese-etzy" . "\e$(06/0D\e(B") | |
76 | ("chinese-punct-b5" . "\e$(0O:\e(BB") | |
77 | ("chinese-punct" . "\e$A1j\e(BG") | |
78 | ("chinese-py-b5" . "\e$(03<\e(BB") | |
79 | ("chinese-py" . "\e$AF4\e(BG") | |
80 | ("chinese-qj-b5" . "\e$(0)A\e(BB") | |
81 | ("chinese-qj" . "\e$AH+\e(BG") | |
82 | ("chinese-sw" . "\e$AJWN2\e(B") | |
83 | ("chinese-tonepy" . "\e$A5wF4\e(B") | |
84 | ("chinese-ziranma" . "\e$AK+F4\e(B") | |
85 | ("chinese-zozy" . "\e$(0I\0D\e(B"))) | |
4ed46869 KH |
86 | |
87 | ;; Return a value of the key in the current line. | |
88 | (defsubst tit-read-key-value () | |
89 | (if (looking-at "[^ \t\n]+") | |
90 | (car (read-from-string (concat "\"" (match-string 0) "\""))))) | |
91 | ||
92 | ;; Return an appropriate quail-package filename from FILENAME (TIT | |
49ed466f KH |
93 | ;; dictionary filename). For instance, ".../ZOZY.tit" -> "ZOZY.el". |
94 | (defun tit-make-quail-package-file-name (filename &optional dirname) | |
4ed46869 | 95 | (expand-file-name |
49ed466f | 96 | (concat (file-name-nondirectory (substring filename 0 -4)) ".el") |
4ed46869 KH |
97 | dirname)) |
98 | ||
99 | ;; This value is t if we are processing phrase dictionary. | |
100 | (defvar tit-phrase nil) | |
101 | (defvar tit-encode nil) | |
102 | (defvar tit-default-encode "GB") | |
103 | ||
104 | ;; Generate elements of KEY-BINDINGS arg for `quail-define-package' so | |
105 | ;; that each characters in KEYS invokes FUNCTION-SYMBOL. | |
106 | (defun tit-generate-key-bindings (keys function-symbol) | |
107 | (let ((len (length keys)) | |
108 | (i 0) | |
109 | key) | |
110 | (while (< i len) | |
111 | (setq key (aref keys i)) | |
112 | (indent-to 3) | |
113 | (if (< key ?\ ) | |
114 | (if (eq (lookup-key quail-translation-keymap (char-to-string key)) | |
115 | 'quail-execute-non-quail-command) | |
116 | (insert (format "(\"\\C-%c\" . %s)\n" | |
117 | (+ key ?@) function-symbol))) | |
118 | (if (< key 127) | |
119 | (insert (format "(\"%c\" . %s)\n" key function-symbol)) | |
120 | (insert (format "(\"\\C-?\" . %s)\n" function-symbol)))) | |
121 | (setq i (1+ i))))) | |
122 | ||
123 | ;; Analyze header part of TIT dictionary and generate an appropriate | |
124 | ;; `quail-define-package' function call. | |
125 | (defun tit-process-header (filename) | |
126 | (message "Processing header part...") | |
127 | (goto-char (point-min)) | |
128 | ||
129 | (let (;; TIT keywords and the corresponding default values. | |
130 | (tit-multichoice t) | |
131 | (tit-prompt "") | |
132 | (tit-comments nil) | |
133 | (tit-backspace "\010\177") | |
134 | (tit-deleteall "\015\025") | |
135 | (tit-moveright ".>") | |
136 | (tit-moveleft ",<") | |
137 | (tit-keyprompt nil)) | |
138 | ;; At first, collect information from the header. | |
139 | (while (not (eobp)) | |
140 | (insert ";; ") | |
141 | (let ((ch (following-char))) | |
142 | (cond ((= ch ?C) ; COMMENT | |
143 | (cond ((looking-at "COMMENT") | |
144 | (let ((pos (match-end 0))) | |
145 | (end-of-line) | |
146 | (while (re-search-backward "[\"\\]" pos t) | |
147 | (insert "\\") | |
148 | (forward-char -1)) | |
149 | (end-of-line) | |
150 | (setq tit-comments (cons (buffer-substring pos (point)) | |
151 | tit-comments)))))) | |
152 | ((= ch ?M) ; MULTICHOICE, MOVERIGHT, MOVELEFT | |
153 | (cond ((looking-at "MULTICHOICE:[ \t]*") | |
154 | (goto-char (match-end 0)) | |
155 | (setq tit-multichoice (looking-at "YES"))) | |
156 | ((looking-at "MOVERIGHT:[ \t]*") | |
157 | (goto-char (match-end 0)) | |
158 | (setq tit-moveright (tit-read-key-value))) | |
159 | ((looking-at "MOVELEFT:[ \t]*") | |
160 | (goto-char (match-end 0)) | |
161 | (setq tit-moveleft (tit-read-key-value))))) | |
162 | ((= ch ?P) ; PROMPT | |
163 | (cond ((looking-at "PROMPT:[ \t]*") | |
164 | (goto-char (match-end 0)) | |
165 | (setq tit-prompt (tit-read-key-value))))) | |
166 | ((= ch ?B) ; BACKSPACE, BEGINDICTIONARY, | |
167 | ; BEGINPHRASE | |
168 | (cond ((looking-at "BACKSPACE:[ \t]*") | |
169 | (goto-char (match-end 0)) | |
170 | (setq tit-backspace (tit-read-key-value))) | |
171 | ((looking-at "BEGINDICTIONARY") | |
172 | (setq tit-phrase nil)) | |
173 | ((looking-at "BEGINPHRASE") | |
174 | (setq tit-phrase t)))) | |
175 | ((= ch ?K) ; KEYPROMPT | |
176 | (cond ((looking-at "KEYPROMPT(\\(.*\\)):[ \t]*") | |
177 | (let ((key-char (match-string 1))) | |
178 | (goto-char (match-end 0)) | |
1fa1e1f5 RS |
179 | (if (string-match "\\\\[0-9]+" key-char) |
180 | (setq key-char | |
181 | (car (read-from-string (format "\"%s\"" | |
182 | key-char))))) | |
4ed46869 KH |
183 | (setq tit-keyprompt |
184 | (cons (cons key-char (tit-read-key-value)) | |
185 | tit-keyprompt)))))))) | |
186 | (forward-line 1)) | |
187 | ||
188 | ;; Then, generate header part of the Quail package. | |
189 | (goto-char (point-min)) | |
49ed466f KH |
190 | (let ((package |
191 | (concat | |
192 | "chinese-" | |
193 | (substring (downcase (file-name-nondirectory buffer-file-name)) | |
194 | 0 -3)))) | |
195 | (insert ";; Quail package `" | |
196 | package | |
197 | "' generated by the command `titdic-convert'\n" | |
198 | ";;\tDate: " (current-time-string) "\n" | |
199 | ";;\tOriginal TIT dictionary file: " | |
200 | (file-name-nondirectory filename) | |
201 | "\n\n" | |
202 | ";;; Comment:\n\n" | |
203 | ";; Do byte-compile this file again after any modification.\n\n" | |
204 | ";;; Start of the header of original TIT dictionary.\n\n") | |
205 | ||
206 | (goto-char (point-max)) | |
207 | (insert "\n" | |
208 | ";;; End of the header of original TIT dictionary.\n\n" | |
209 | ";;; Code:\n\n" | |
210 | "(require 'quail)\n\n") | |
211 | ||
212 | (insert "(quail-define-package ") | |
213 | ;; Args NAME, LANGUAGE, TITLE | |
214 | (let ((title (cdr (assoc package quail-cxterm-package-title-alist)))) | |
215 | (insert | |
216 | "\"" | |
217 | package | |
218 | "\" \"" (nth 2 (assoc tit-encode tit-encode-list)) | |
219 | "\" \"" | |
220 | (or title | |
221 | (if (string-match "[:\e$A!K\e$(0!(!J\e(B]+\\([^:\e$A!K\e$(0!(!K\e(B]+\\)" tit-prompt) | |
222 | (substring tit-prompt (match-beginning 1) (match-end 1)) | |
223 | tit-prompt)) | |
224 | "\"\n")) | |
225 | ) | |
4ed46869 KH |
226 | |
227 | ;; Arg GUIDANCE | |
228 | (if tit-keyprompt | |
229 | (progn | |
230 | (insert " '(") | |
231 | (while tit-keyprompt | |
232 | (indent-to 3) | |
233 | (insert (format "(%d . \"%s\")\n" | |
234 | (string-to-char (car (car tit-keyprompt))) | |
235 | (cdr (car tit-keyprompt)))) | |
236 | (setq tit-keyprompt (cdr tit-keyprompt))) | |
237 | (forward-char -1) | |
238 | (insert ")") | |
239 | (forward-char 1)) | |
240 | (insert " t\n")) | |
241 | ||
242 | ;; Arg DOCSTRING | |
243 | (insert "\"" tit-prompt "\n") | |
244 | (let ((l (nreverse tit-comments))) | |
245 | (while l | |
246 | (insert (format "%s\n" (car l))) | |
247 | (setq l (cdr l)))) | |
248 | (insert "\"\n") | |
249 | ||
250 | ;; Arg KEY-BINDINGS | |
251 | (insert " '(") | |
252 | (tit-generate-key-bindings tit-backspace 'quail-delete-last-char) | |
253 | (tit-generate-key-bindings tit-deleteall 'quail-abort-translation) | |
254 | (tit-generate-key-bindings tit-moveright 'quail-next-translation) | |
255 | (tit-generate-key-bindings tit-moveleft 'quail-prev-translation) | |
256 | (forward-char -1) | |
257 | (insert ")") | |
258 | (forward-char 1) | |
259 | ||
260 | ;; Args FORGET-TRANSLATION, DETERMINISTIC, KBD-TRANSLATE, SHOW-LAYOUT. | |
261 | ;; The remaining args are all nil. | |
262 | (insert " nil" | |
263 | (if tit-multichoice " nil" " t") | |
264 | (if tit-keyprompt " t t)\n\n" " nil nil)\n\n"))) | |
265 | ||
266 | ;; Return the position of end of the header. | |
267 | (point-max)) | |
268 | ||
269 | ;; Convert body part of TIT dictionary into `quail-define-rules' | |
270 | ;; function call. | |
271 | (defun tit-process-body () | |
272 | (message "Formatting translation rules...") | |
273 | (let ((enable-multibyte-characters nil) | |
274 | (keyseq "\000") | |
275 | pos) | |
276 | (insert "(quail-define-rules\n") | |
277 | (while (null (eobp)) | |
278 | (if (or (= (following-char) ?#) (= (following-char) ?\n)) | |
08a1bf22 KH |
279 | (progn |
280 | (insert ";; ") | |
281 | (forward-line 1)) | |
4ed46869 KH |
282 | (insert "(\"") |
283 | (setq pos (point)) | |
284 | (skip-chars-forward "^ \t") | |
285 | (setq keyseq | |
286 | (concat (regexp-quote (buffer-substring pos (point))) "[ \t]+")) | |
287 | (save-excursion | |
c374d5ed KH |
288 | ;; Escape `"' and `\' which is not used for quoting the |
289 | ;; following octal digits. | |
290 | (while (re-search-backward "\"\\|\\\\[^0-9]" pos t) | |
4ed46869 KH |
291 | (insert "\\") |
292 | (forward-char -1))) | |
293 | (insert "\"") | |
294 | (skip-chars-forward " \t") | |
295 | ||
296 | ;; Now point is at the start of translations. Remember it in | |
297 | ;; POS and combine lines of the same key sequence while | |
298 | ;; deleting trailing white spaces and comments (start with | |
299 | ;; '#'). POS doesn't has to be a marker because we never | |
300 | ;; modify region before POS. | |
301 | (setq pos (point)) | |
302 | (if (looking-at "[^ \t]*\\([ \t]*#.*\\)") | |
303 | (delete-region (match-beginning 1) (match-end 1))) | |
304 | (while (and (= (forward-line 1) 0) | |
305 | (looking-at keyseq)) | |
306 | (let ((p (match-end 0))) | |
307 | (skip-chars-backward " \t\n") | |
308 | (delete-region (point) p) | |
309 | (if tit-phrase (insert " ")) | |
310 | (if (looking-at "[^ \t]*\\([ \t]*#.*\\)") | |
311 | (delete-region (match-beginning 1) (match-end 1))) | |
312 | )) | |
313 | ||
4ed46869 | 314 | (goto-char pos) |
08a1bf22 KH |
315 | (if (eolp) |
316 | ;; This entry contains no translations. Let's ignore it. | |
4ed46869 | 317 | (progn |
08a1bf22 KH |
318 | (beginning-of-line) |
319 | (setq pos (point)) | |
320 | (forward-line 1) | |
321 | (delete-region pos (point))) | |
322 | ||
323 | ;; Modify the current line to meet the syntax of Quail package. | |
324 | (if tit-phrase | |
325 | (progn | |
326 | ;; PHRASE1 PHRASE2 ... => ["PHRASE1" "PHRASE2" ...] | |
1fa1e1f5 RS |
327 | (insert "[") |
328 | (skip-chars-forward " \t") | |
08a1bf22 KH |
329 | (while (not (eolp)) |
330 | (insert "\"") | |
1fa1e1f5 | 331 | (skip-chars-forward "^ \t\n") |
08a1bf22 | 332 | (insert "\"") |
1fa1e1f5 RS |
333 | (skip-chars-forward " \t")) |
334 | (insert "])")) | |
08a1bf22 KH |
335 | ;; TRANSLATIONS => "TRANSLATIONS" |
336 | (insert "\"") | |
337 | (end-of-line) | |
1fa1e1f5 | 338 | (skip-chars-backward " \t") |
08a1bf22 KH |
339 | (insert "\")")) |
340 | (forward-line 1)))) | |
4ed46869 KH |
341 | (insert ")\n"))) |
342 | ||
343 | ;;;###autoload | |
344 | (defun titdic-convert (filename &optional dirname) | |
345 | "Convert a TIT dictionary of FILENAME into a Quail package. | |
346 | Optional argument DIRNAME if specified is the directory name under which | |
347 | the generated Quail package is saved." | |
348 | (interactive "FTIT dictionary file: ") | |
349 | (let ((buf (get-buffer-create "*tit-work*"))) | |
350 | (save-excursion | |
351 | ;; Setup the buffer. | |
352 | (set-buffer buf) | |
353 | (erase-buffer) | |
354 | (let ((coding-system-for-read 'no-conversion)) | |
355 | (insert-file-contents (expand-file-name filename))) | |
49ed466f KH |
356 | (set-visited-file-name |
357 | (tit-make-quail-package-file-name filename dirname) t) | |
c47cd165 | 358 | (set-buffer-file-coding-system 'iso-2022-7bit) |
4ed46869 KH |
359 | |
360 | ;; Decode the buffer contents from the encoding specified by a | |
361 | ;; value of the key "ENCODE:". | |
362 | (let (coding-system) | |
363 | (save-excursion | |
364 | (if (search-forward "\nBEGIN" nil t) | |
365 | (let ((limit (point)) | |
366 | slot) | |
367 | (goto-char 1) | |
368 | (if (re-search-forward "^ENCODE:[ \t]*" limit t) | |
369 | (progn | |
370 | (goto-char (match-end 0)) | |
371 | (setq tit-encode (tit-read-key-value))) | |
372 | (setq tit-encode tit-default-encode)) | |
373 | (setq slot (assoc tit-encode tit-encode-list)) | |
374 | (if slot | |
375 | (setq coding-system (nth 1 slot)) | |
376 | (error "Invalid ENCODE: value in TIT dictionary"))) | |
377 | (error "TIT dictionary doesn't have body part"))) | |
378 | (message "Decoding %s..." coding-system) | |
379 | (goto-char 1) | |
380 | (decode-coding-region 1 (point-max) coding-system)) | |
381 | ||
382 | ;; Set point the starting position of the body part. | |
383 | (goto-char 1) | |
384 | (if (search-forward "\nBEGIN" nil t) | |
385 | (forward-line 1) | |
386 | (error "TIT dictionary can't be decoded correctly")) | |
387 | ||
388 | ;; Now process the header and body parts. | |
389 | (goto-char | |
390 | (save-excursion | |
391 | (save-restriction | |
392 | (narrow-to-region 1 (point)) | |
393 | (tit-process-header filename)))) | |
394 | (tit-process-body)) | |
395 | ||
396 | (if noninteractive | |
397 | ;; Save the Quail package file. | |
398 | (save-excursion | |
399 | (set-buffer buf) | |
400 | (save-buffer 0)) | |
401 | ;; Show the Quail package just generated. | |
402 | (switch-to-buffer buf) | |
403 | (goto-char 1) | |
404 | (message "Save this buffer after you make any modification")))) | |
405 | ||
406 | ;;;###autoload | |
44cbfae9 | 407 | (defun batch-titdic-convert (&optional force) |
4ed46869 KH |
408 | "Run `titdic-convert' on the files remaining on the command line. |
409 | Use this from the command line, with `-batch'; | |
410 | it won't work in an interactive Emacs. | |
411 | For example, invoke \"emacs -batch -f batch-titdic-convert XXX.tit\" to | |
412 | generate Quail package file \"xxx.el\" from TIT dictionary file \"XXX.tit\". | |
413 | To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"." | |
414 | (defvar command-line-args-left) ; Avoid compiler warning. | |
415 | (if (not noninteractive) | |
416 | (error "`batch-titdic-convert' should be used only with -batch")) | |
417 | (if (string= (car command-line-args-left) "-h") | |
418 | (progn | |
419 | (message "To convert XXX.tit and YYY.tit into xxx.el and yyy.el:") | |
420 | (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert XXX.tit YYY.tit") | |
421 | (message "To convert XXX.tit into DIR/xxx.el:") | |
422 | (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert -dir DIR XXX.tit")) | |
423 | (let (targetdir filename files file) | |
424 | (if (string= (car command-line-args-left) "-dir") | |
425 | (progn | |
426 | (setq command-line-args-left (cdr command-line-args-left)) | |
427 | (setq targetdir (car command-line-args-left)) | |
428 | (setq command-line-args-left (cdr command-line-args-left)))) | |
429 | (while command-line-args-left | |
430 | (setq filename (expand-file-name (car command-line-args-left))) | |
431 | (if (file-directory-p filename) | |
432 | (progn | |
433 | (message "Converting all tit files in the directory %s" filename) | |
434 | (setq files (directory-files filename t "\\.tit$"))) | |
435 | (setq files (list filename))) | |
436 | (while files | |
437 | (setq file (expand-file-name (car files))) | |
44cbfae9 KH |
438 | (when (or force |
439 | (file-newer-than-file-p | |
440 | file (tit-make-quail-package-file-name file targetdir))) | |
441 | (message "Converting %s to quail-package..." file) | |
442 | (titdic-convert file targetdir)) | |
4ed46869 KH |
443 | (setq files (cdr files))) |
444 | (setq command-line-args-left (cdr command-line-args-left))) | |
445 | (message "Do byte-compile the created files by:") | |
446 | (message " %% emacs -batch -f batch-byte-compile XXX.el"))) | |
447 | (kill-emacs 0)) | |
448 | ||
449 | ;;; titdic-cnv.el ends here |