Commit | Line | Data |
---|---|---|
49ed466f | 1 | ;;; titdic-cnv.el --- convert cxterm dictionary (TIT format) to Quail package |
4ed46869 | 2 | |
4ed46869 | 3 | ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. |
fa526c4a | 4 | ;; Licensed to the Free Software Foundation. |
4ed46869 KH |
5 | |
6 | ;; Keywords: Quail, TIT, cxterm | |
7 | ||
8 | ;; This file is part of GNU Emacs. | |
9 | ||
10 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
11 | ;; it under the terms of the GNU General Public License as published by | |
12 | ;; the Free Software Foundation; either version 2, or (at your option) | |
13 | ;; any later version. | |
14 | ||
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
369314dc KH |
21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
22 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
23 | ;; Boston, MA 02111-1307, USA. | |
4ed46869 KH |
24 | |
25 | ;;; Comments: | |
26 | ||
49ed466f | 27 | ;; Convert cxterm dictionary (of TIT format) to quail-package. |
4ed46869 KH |
28 | ;; |
29 | ;; Usage (within Emacs): | |
49ed466f | 30 | ;; M-x titdic-convert<CR>CXTERM-DICTIONARY-NAME<CR> |
4ed46869 | 31 | ;; Usage (from shell): |
49ed466f | 32 | ;; % emacs -batch -l titdic-cnv -f batch-titdic-convert\ |
4ed46869 KH |
33 | ;; [-dir DIR] [DIR | FILE] ... |
34 | ;; | |
35 | ;; When you run titdic-convert within Emacs, you have a chance to | |
36 | ;; modify arguments of `quail-define-package' before saving the | |
37 | ;; converted file. For instance, you are likely to modify TITLE, | |
38 | ;; DOCSTRING, and KEY-BINDINGS. | |
39 | ||
49ed466f | 40 | ;; Cxterm dictionary file (*.tit) is a line-oriented text (English, |
4ed46869 KH |
41 | ;; Chinese, Japanese, and Korean) file. The whole file contains of |
42 | ;; two parts, the definition part (`header' here after) followed by | |
43 | ;; the dictionary part (`body' here after). All lines begin with | |
44 | ;; leading '#' are ignored. | |
45 | ;; | |
46 | ;; Each line in the header part has two fields, KEY and VALUE. These | |
47 | ;; fields are separated by one or more white characters. | |
48 | ;; | |
49 | ;; Each line in the body part has two fields, KEYSEQ and TRANSLATIONS. | |
50 | ;; These fields are separated by one or more white characters. | |
51 | ;; | |
52 | ;; See the manual page of `tit2cit' of cxterm distribution for more | |
53 | ;; detail. | |
54 | ||
55 | ;;; Code: | |
56 | ||
57 | (require 'quail) | |
58 | ||
49ed466f | 59 | ;; List of values of key "ENCODE:" and the corresponding Emacs |
4ed46869 KH |
60 | ;; coding-system and language environment name. |
61 | (defvar tit-encode-list | |
a7f2c216 KH |
62 | '(("GB" euc-china "Chinese-GB") |
63 | ("BIG5" cn-big5 "Chinese-BIG5") | |
64 | ("JIS" euc-japan "Japanese") | |
49ed466f KH |
65 | ("KS" euc-kr "Korean"))) |
66 | ||
67 | ;; List of package names and the corresponding titles. | |
68 | (defvar quail-cxterm-package-title-alist | |
69 | '(("chinese-4corner" . "\e$(0(?-F\e(B") | |
70 | ("chinese-array30" . "\e$(0#R#O\e(B") | |
71 | ("chinese-ccdospy" . "\e$AKuF4\e(B") | |
72 | ("chinese-ctlau" . "\e$AAuTA\e(B") | |
73 | ("chinese-ctlaub" . "\e$(0N,Gn\e(B") | |
74 | ("chinese-ecdict" . "\e$(05CKH\e(B") | |
75 | ("chinese-etzy" . "\e$(06/0D\e(B") | |
76 | ("chinese-punct-b5" . "\e$(0O:\e(BB") | |
77 | ("chinese-punct" . "\e$A1j\e(BG") | |
78 | ("chinese-py-b5" . "\e$(03<\e(BB") | |
79 | ("chinese-py" . "\e$AF4\e(BG") | |
80 | ("chinese-qj-b5" . "\e$(0)A\e(BB") | |
81 | ("chinese-qj" . "\e$AH+\e(BG") | |
82 | ("chinese-sw" . "\e$AJWN2\e(B") | |
83 | ("chinese-tonepy" . "\e$A5wF4\e(B") | |
84 | ("chinese-ziranma" . "\e$AK+F4\e(B") | |
85 | ("chinese-zozy" . "\e$(0I\0D\e(B"))) | |
4ed46869 KH |
86 | |
87 | ;; Return a value of the key in the current line. | |
88 | (defsubst tit-read-key-value () | |
89 | (if (looking-at "[^ \t\n]+") | |
90 | (car (read-from-string (concat "\"" (match-string 0) "\""))))) | |
91 | ||
92 | ;; Return an appropriate quail-package filename from FILENAME (TIT | |
49ed466f KH |
93 | ;; dictionary filename). For instance, ".../ZOZY.tit" -> "ZOZY.el". |
94 | (defun tit-make-quail-package-file-name (filename &optional dirname) | |
4ed46869 | 95 | (expand-file-name |
49ed466f | 96 | (concat (file-name-nondirectory (substring filename 0 -4)) ".el") |
4ed46869 KH |
97 | dirname)) |
98 | ||
1375754c KH |
99 | ;; This value is nil if we are processing phrase dictionary. |
100 | (defconst tit-dictionary t) | |
4ed46869 KH |
101 | (defvar tit-encode nil) |
102 | (defvar tit-default-encode "GB") | |
103 | ||
104 | ;; Generate elements of KEY-BINDINGS arg for `quail-define-package' so | |
105 | ;; that each characters in KEYS invokes FUNCTION-SYMBOL. | |
106 | (defun tit-generate-key-bindings (keys function-symbol) | |
107 | (let ((len (length keys)) | |
108 | (i 0) | |
1375754c | 109 | (first t) |
4ed46869 KH |
110 | key) |
111 | (while (< i len) | |
1375754c | 112 | (or first (princ "\n ")) |
4ed46869 | 113 | (setq key (aref keys i)) |
1375754c KH |
114 | (if (if (< key ?\ ) |
115 | (eq (lookup-key quail-translation-keymap | |
116 | (char-to-string key)) | |
4ed46869 | 117 | 'quail-execute-non-quail-command) |
1375754c KH |
118 | (<= key 127)) |
119 | (progn | |
120 | (princ (cons (cond ((< key ?\ ) (format "\"\\C-%c\"" (+ key ?@))) | |
121 | ((< key 127) (format "\"%c\"" key)) | |
122 | (t "\"\\C-?\"")) | |
123 | function-symbol)) | |
124 | (setq first nil))) | |
4ed46869 KH |
125 | (setq i (1+ i))))) |
126 | ||
127 | ;; Analyze header part of TIT dictionary and generate an appropriate | |
128 | ;; `quail-define-package' function call. | |
129 | (defun tit-process-header (filename) | |
130 | (message "Processing header part...") | |
131 | (goto-char (point-min)) | |
132 | ||
1375754c KH |
133 | ;; At first, generate header part of the Quail package while |
134 | ;; collecting information from the original header. | |
135 | (let ((package (concat | |
136 | "chinese-" | |
137 | (substring (downcase (file-name-nondirectory filename)) | |
138 | 0 -4))) | |
139 | ;; TIT keywords and the corresponding default values. | |
4ed46869 KH |
140 | (tit-multichoice t) |
141 | (tit-prompt "") | |
142 | (tit-comments nil) | |
143 | (tit-backspace "\010\177") | |
144 | (tit-deleteall "\015\025") | |
145 | (tit-moveright ".>") | |
146 | (tit-moveleft ",<") | |
147 | (tit-keyprompt nil)) | |
1375754c KH |
148 | |
149 | (princ ";; Quail package `") | |
150 | (princ package) | |
151 | (princ "' generated by the command `titdic-convert'\n;;\tDate: ") | |
152 | (princ (current-time-string)) | |
153 | (princ "\n;;\tOriginal TIT dictionary file: ") | |
154 | (princ (file-name-nondirectory filename)) | |
155 | (princ "\n\n;;; Comment:\n\n") | |
86e4f7c0 | 156 | (princ ";; Byte-compile this file again after any modification.\n\n") |
1375754c KH |
157 | (princ ";;; Start of the header of original TIT dictionary.\n\n") |
158 | ||
4ed46869 | 159 | (while (not (eobp)) |
1375754c KH |
160 | (let ((ch (following-char)) |
161 | (pos (point))) | |
4ed46869 KH |
162 | (cond ((= ch ?C) ; COMMENT |
163 | (cond ((looking-at "COMMENT") | |
164 | (let ((pos (match-end 0))) | |
165 | (end-of-line) | |
4ed46869 KH |
166 | (setq tit-comments (cons (buffer-substring pos (point)) |
167 | tit-comments)))))) | |
168 | ((= ch ?M) ; MULTICHOICE, MOVERIGHT, MOVELEFT | |
169 | (cond ((looking-at "MULTICHOICE:[ \t]*") | |
170 | (goto-char (match-end 0)) | |
171 | (setq tit-multichoice (looking-at "YES"))) | |
172 | ((looking-at "MOVERIGHT:[ \t]*") | |
173 | (goto-char (match-end 0)) | |
174 | (setq tit-moveright (tit-read-key-value))) | |
175 | ((looking-at "MOVELEFT:[ \t]*") | |
176 | (goto-char (match-end 0)) | |
177 | (setq tit-moveleft (tit-read-key-value))))) | |
178 | ((= ch ?P) ; PROMPT | |
179 | (cond ((looking-at "PROMPT:[ \t]*") | |
180 | (goto-char (match-end 0)) | |
181 | (setq tit-prompt (tit-read-key-value))))) | |
182 | ((= ch ?B) ; BACKSPACE, BEGINDICTIONARY, | |
183 | ; BEGINPHRASE | |
184 | (cond ((looking-at "BACKSPACE:[ \t]*") | |
185 | (goto-char (match-end 0)) | |
186 | (setq tit-backspace (tit-read-key-value))) | |
187 | ((looking-at "BEGINDICTIONARY") | |
1375754c | 188 | (setq tit-dictionary t)) |
4ed46869 | 189 | ((looking-at "BEGINPHRASE") |
1375754c | 190 | (setq tit-dictionary nil)))) |
4ed46869 KH |
191 | ((= ch ?K) ; KEYPROMPT |
192 | (cond ((looking-at "KEYPROMPT(\\(.*\\)):[ \t]*") | |
193 | (let ((key-char (match-string 1))) | |
194 | (goto-char (match-end 0)) | |
1fa1e1f5 RS |
195 | (if (string-match "\\\\[0-9]+" key-char) |
196 | (setq key-char | |
197 | (car (read-from-string (format "\"%s\"" | |
198 | key-char))))) | |
4ed46869 KH |
199 | (setq tit-keyprompt |
200 | (cons (cons key-char (tit-read-key-value)) | |
1375754c KH |
201 | tit-keyprompt))))))) |
202 | (end-of-line) | |
203 | (princ ";; ") | |
204 | (princ (buffer-substring pos (point))) | |
205 | (princ "\n") | |
206 | (forward-line 1))) | |
4ed46869 | 207 | |
1375754c KH |
208 | (princ "\n;;; End of the header of original TIT dictionary.\n\n") |
209 | (princ ";;; Code:\n\n(require 'quail)\n\n") | |
210 | ||
211 | (princ "(quail-define-package ") | |
212 | ;; Args NAME, LANGUAGE, TITLE | |
213 | (let ((title (cdr (assoc package quail-cxterm-package-title-alist)))) | |
214 | (princ "\"") | |
215 | (princ package) | |
216 | (princ "\" \"") | |
217 | (princ (nth 2 (assoc tit-encode tit-encode-list))) | |
218 | (princ "\" \"") | |
219 | (princ (or title | |
220 | (if (string-match "[:\e$A!K\e$(0!(!J\e(B]+\\([^:\e$A!K\e$(0!(!K\e(B]+\\)" tit-prompt) | |
221 | (substring tit-prompt (match-beginning 1) (match-end 1)) | |
222 | tit-prompt))) | |
223 | (princ "\"\n")) | |
4ed46869 KH |
224 | |
225 | ;; Arg GUIDANCE | |
226 | (if tit-keyprompt | |
227 | (progn | |
1375754c | 228 | (princ " '(") |
4ed46869 | 229 | (while tit-keyprompt |
1375754c KH |
230 | (princ " ") |
231 | (princ (format "(%d . \"%s\")\n" | |
232 | (string-to-char (car (car tit-keyprompt))) | |
233 | (cdr (car tit-keyprompt)))) | |
4ed46869 | 234 | (setq tit-keyprompt (cdr tit-keyprompt))) |
1375754c KH |
235 | (princ ")")) |
236 | (princ " t\n")) | |
4ed46869 KH |
237 | |
238 | ;; Arg DOCSTRING | |
1375754c KH |
239 | (prin1 |
240 | (mapconcat 'identity (cons tit-prompt (nreverse tit-comments)) "\n")) | |
241 | (terpri) | |
4ed46869 KH |
242 | |
243 | ;; Arg KEY-BINDINGS | |
1375754c | 244 | (princ " '(") |
4ed46869 | 245 | (tit-generate-key-bindings tit-backspace 'quail-delete-last-char) |
1375754c | 246 | (princ "\n ") |
4ed46869 | 247 | (tit-generate-key-bindings tit-deleteall 'quail-abort-translation) |
1375754c | 248 | (princ "\n ") |
4ed46869 | 249 | (tit-generate-key-bindings tit-moveright 'quail-next-translation) |
1375754c | 250 | (princ "\n ") |
4ed46869 | 251 | (tit-generate-key-bindings tit-moveleft 'quail-prev-translation) |
1375754c | 252 | (princ ")\n") |
4ed46869 KH |
253 | |
254 | ;; Args FORGET-TRANSLATION, DETERMINISTIC, KBD-TRANSLATE, SHOW-LAYOUT. | |
255 | ;; The remaining args are all nil. | |
1375754c KH |
256 | (princ " nil") |
257 | (princ (if tit-multichoice " nil" " t")) | |
258 | (princ (if tit-keyprompt " t t)\n\n" " nil nil)\n\n")))) | |
259 | ||
260 | (defsubst tit-flush-translations (key translations) | |
261 | (if (string-match "\\\\[0-9][0-9][0-9]" key) | |
262 | (let ((newkey (concat (substring key 0 (match-beginning 0)) | |
263 | (car (read-from-string | |
264 | (concat "\"" (match-string 0 key) "\""))))) | |
265 | (idx (match-end 0))) | |
266 | (while (string-match "\\\\[0-9][0-9][0-9]" key idx) | |
267 | (setq newkey (concat | |
268 | newkey | |
269 | (substring key idx (match-beginning 0)) | |
270 | (car (read-from-string | |
271 | (concat "\"" (match-string 0 key) "\""))))) | |
272 | (setq idx (match-end 0))) | |
273 | (setq key (concat newkey (substring key idx))))) | |
274 | (prin1 (list key (if tit-dictionary translations | |
275 | (vconcat (nreverse translations))))) | |
276 | (princ "\n")) | |
4ed46869 KH |
277 | |
278 | ;; Convert body part of TIT dictionary into `quail-define-rules' | |
279 | ;; function call. | |
280 | (defun tit-process-body () | |
281 | (message "Formatting translation rules...") | |
1375754c KH |
282 | (let* ((template (list nil nil)) |
283 | (second (cdr template)) | |
284 | (prev-key "") | |
285 | ch key translations pos) | |
286 | (princ "(quail-define-rules\n") | |
4ed46869 | 287 | (while (null (eobp)) |
1375754c KH |
288 | (setq ch (following-char)) |
289 | (if (or (= ch ?#) (= ch ?\n)) | |
290 | (forward-line 1) | |
4ed46869 | 291 | (setq pos (point)) |
1375754c KH |
292 | (skip-chars-forward "^ \t\n") |
293 | (setq key (buffer-substring pos (point))) | |
4ed46869 | 294 | (skip-chars-forward " \t") |
1375754c KH |
295 | (setq ch (following-char)) |
296 | (if (or (= ch ?#) (= ch ?\n)) | |
08a1bf22 | 297 | ;; This entry contains no translations. Let's ignore it. |
1375754c KH |
298 | (forward-line 1) |
299 | (or (string= key prev-key) | |
08a1bf22 | 300 | (progn |
1375754c KH |
301 | (if translations |
302 | (tit-flush-translations prev-key translations)) | |
303 | (setq translations nil | |
304 | prev-key key))) | |
305 | (if tit-dictionary | |
306 | (progn | |
307 | (setq pos (point)) | |
308 | (skip-chars-forward "^ \t#\n") | |
309 | (setq translations | |
310 | (if translations | |
311 | (concat translations | |
312 | (buffer-substring pos (point))) | |
313 | (buffer-substring pos (point))))) | |
314 | (while (not (eolp)) | |
315 | (setq pos (point)) | |
316 | (skip-chars-forward "^ \t\n") | |
317 | (setq translations (cons (buffer-substring pos (point)) | |
318 | translations)) | |
319 | (skip-chars-forward " \t") | |
320 | (setq ch (following-char)) | |
321 | (if (= ch ?#) (end-of-line)))) | |
08a1bf22 | 322 | (forward-line 1)))) |
1375754c KH |
323 | |
324 | (if translations | |
325 | (tit-flush-translations prev-key translations)) | |
326 | (princ ")\n"))) | |
4ed46869 KH |
327 | |
328 | ;;;###autoload | |
329 | (defun titdic-convert (filename &optional dirname) | |
330 | "Convert a TIT dictionary of FILENAME into a Quail package. | |
331 | Optional argument DIRNAME if specified is the directory name under which | |
332 | the generated Quail package is saved." | |
333 | (interactive "FTIT dictionary file: ") | |
1375754c KH |
334 | (with-temp-file (tit-make-quail-package-file-name filename dirname) |
335 | (set-buffer-file-coding-system 'iso-2022-7bit) | |
336 | (let ((standard-output (current-buffer))) | |
337 | (with-temp-buffer | |
338 | (let ((coding-system-for-read 'no-conversion)) | |
339 | (insert-file-contents (expand-file-name filename))) | |
ecd57ad4 | 340 | (set-buffer-multibyte t) |
1375754c KH |
341 | |
342 | ;; Decode the buffer contents from the encoding specified by a | |
343 | ;; value of the key "ENCODE:". | |
344 | (if (not (search-forward "\nBEGIN" nil t)) | |
345 | (error "TIT dictionary doesn't have body part")) | |
346 | (let ((limit (point)) | |
347 | coding-system slot) | |
348 | (goto-char (point-min)) | |
349 | (if (re-search-forward "^ENCODE:[ \t]*" limit t) | |
350 | (progn | |
351 | (goto-char (match-end 0)) | |
352 | (setq tit-encode (tit-read-key-value))) | |
353 | (setq tit-encode tit-default-encode)) | |
354 | (setq slot (assoc tit-encode tit-encode-list)) | |
355 | (if (not slot) | |
356 | (error "Invalid ENCODE: value in TIT dictionary")) | |
357 | (setq coding-system (nth 1 slot)) | |
86e4f7c0 | 358 | (message "Decoding with coding system %s..." coding-system) |
1375754c KH |
359 | (goto-char (point-min)) |
360 | (decode-coding-region (point-min) (point-max) coding-system)) | |
361 | ||
362 | ;; Set point the starting position of the body part. | |
363 | (goto-char (point-min)) | |
364 | (if (not (search-forward "\nBEGIN" nil t)) | |
365 | (error "TIT dictionary can't be decoded correctly")) | |
366 | ||
367 | ;; Process the header part. | |
368 | (forward-line 1) | |
369 | (narrow-to-region (point-min) (point)) | |
370 | (tit-process-header filename) | |
371 | (widen) | |
372 | ||
373 | ;; Process the body part. For speed, we turn off multibyte facility. | |
374 | (with-current-buffer standard-output | |
375 | (set-buffer-multibyte nil)) | |
376 | (set-buffer-multibyte nil) | |
377 | (tit-process-body))))) | |
4ed46869 KH |
378 | |
379 | ;;;###autoload | |
44cbfae9 | 380 | (defun batch-titdic-convert (&optional force) |
4ed46869 KH |
381 | "Run `titdic-convert' on the files remaining on the command line. |
382 | Use this from the command line, with `-batch'; | |
383 | it won't work in an interactive Emacs. | |
384 | For example, invoke \"emacs -batch -f batch-titdic-convert XXX.tit\" to | |
385 | generate Quail package file \"xxx.el\" from TIT dictionary file \"XXX.tit\". | |
386 | To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"." | |
387 | (defvar command-line-args-left) ; Avoid compiler warning. | |
388 | (if (not noninteractive) | |
389 | (error "`batch-titdic-convert' should be used only with -batch")) | |
390 | (if (string= (car command-line-args-left) "-h") | |
391 | (progn | |
392 | (message "To convert XXX.tit and YYY.tit into xxx.el and yyy.el:") | |
393 | (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert XXX.tit YYY.tit") | |
394 | (message "To convert XXX.tit into DIR/xxx.el:") | |
395 | (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert -dir DIR XXX.tit")) | |
396 | (let (targetdir filename files file) | |
397 | (if (string= (car command-line-args-left) "-dir") | |
398 | (progn | |
399 | (setq command-line-args-left (cdr command-line-args-left)) | |
400 | (setq targetdir (car command-line-args-left)) | |
401 | (setq command-line-args-left (cdr command-line-args-left)))) | |
402 | (while command-line-args-left | |
403 | (setq filename (expand-file-name (car command-line-args-left))) | |
404 | (if (file-directory-p filename) | |
405 | (progn | |
406 | (message "Converting all tit files in the directory %s" filename) | |
407 | (setq files (directory-files filename t "\\.tit$"))) | |
408 | (setq files (list filename))) | |
409 | (while files | |
410 | (setq file (expand-file-name (car files))) | |
44cbfae9 KH |
411 | (when (or force |
412 | (file-newer-than-file-p | |
413 | file (tit-make-quail-package-file-name file targetdir))) | |
414 | (message "Converting %s to quail-package..." file) | |
415 | (titdic-convert file targetdir)) | |
4ed46869 KH |
416 | (setq files (cdr files))) |
417 | (setq command-line-args-left (cdr command-line-args-left))) | |
86e4f7c0 | 418 | (message "Byte-compile the created files by:") |
4ed46869 KH |
419 | (message " %% emacs -batch -f batch-byte-compile XXX.el"))) |
420 | (kill-emacs 0)) | |
421 | ||
422 | ;;; titdic-cnv.el ends here |