Initial revision
[bpt/emacs.git] / lisp / international / titdic-cnv.el
CommitLineData
4ed46869
KH
1;;; titdic-cnv.el --- convert TIT dictionary to Quail package
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: Quail, TIT, cxterm
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to
22;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24;;; Comments:
25
26;; Convert TIT format dictionary (of cxterm) to quail-package.
27;;
28;; Usage (within Emacs):
29;; M-x titdic-convert<CR>TIT-FILE-NAME<CR>
30;; Usage (from shell):
31;; % emacs -batch -l titdic-convert -f batch-titdic-convert\
32;; [-dir DIR] [DIR | FILE] ...
33;;
34;; When you run titdic-convert within Emacs, you have a chance to
35;; modify arguments of `quail-define-package' before saving the
36;; converted file. For instance, you are likely to modify TITLE,
37;; DOCSTRING, and KEY-BINDINGS.
38
39;; TIT dictionary file (*.tit) is a line-oriented text (English,
40;; Chinese, Japanese, and Korean) file. The whole file contains of
41;; two parts, the definition part (`header' here after) followed by
42;; the dictionary part (`body' here after). All lines begin with
43;; leading '#' are ignored.
44;;
45;; Each line in the header part has two fields, KEY and VALUE. These
46;; fields are separated by one or more white characters.
47;;
48;; Each line in the body part has two fields, KEYSEQ and TRANSLATIONS.
49;; These fields are separated by one or more white characters.
50;;
51;; See the manual page of `tit2cit' of cxterm distribution for more
52;; detail.
53
54;;; Code:
55
56(require 'quail)
57
58;; List of values of key "ENCODE:" and the corresponding Emacs'
59;; coding-system and language environment name.
60(defvar tit-encode-list
61 '(("GB" coding-system-euc-china "Chinese-GB")
62 ("BIG5" coding-system-big5 "Chinese-BIG5")
63 ("JIS" coding-system-euc-japan "Japanese")
64 ("KS" coding-system-euc-korea "Korean")))
65
66;; Return a value of the key in the current line.
67(defsubst tit-read-key-value ()
68 (if (looking-at "[^ \t\n]+")
69 (car (read-from-string (concat "\"" (match-string 0) "\"")))))
70
71;; Return an appropriate quail-package filename from FILENAME (TIT
72;; dictionary filename). For instance, ".../ZOZY.tit" -> "zozy.el".
73(defun tit-make-quail-package-name (filename &optional dirname)
74 (expand-file-name
75 (concat (downcase (file-name-nondirectory (substring filename 0 -4))) ".el")
76 dirname))
77
78;; This value is t if we are processing phrase dictionary.
79(defvar tit-phrase nil)
80(defvar tit-encode nil)
81(defvar tit-default-encode "GB")
82
83;; Generate elements of KEY-BINDINGS arg for `quail-define-package' so
84;; that each characters in KEYS invokes FUNCTION-SYMBOL.
85(defun tit-generate-key-bindings (keys function-symbol)
86 (let ((len (length keys))
87 (i 0)
88 key)
89 (while (< i len)
90 (setq key (aref keys i))
91 (indent-to 3)
92 (if (< key ?\ )
93 (if (eq (lookup-key quail-translation-keymap (char-to-string key))
94 'quail-execute-non-quail-command)
95 (insert (format "(\"\\C-%c\" . %s)\n"
96 (+ key ?@) function-symbol)))
97 (if (< key 127)
98 (insert (format "(\"%c\" . %s)\n" key function-symbol))
99 (insert (format "(\"\\C-?\" . %s)\n" function-symbol))))
100 (setq i (1+ i)))))
101
102;; Analyze header part of TIT dictionary and generate an appropriate
103;; `quail-define-package' function call.
104(defun tit-process-header (filename)
105 (message "Processing header part...")
106 (goto-char (point-min))
107
108 (let (;; TIT keywords and the corresponding default values.
109 (tit-multichoice t)
110 (tit-prompt "")
111 (tit-comments nil)
112 (tit-backspace "\010\177")
113 (tit-deleteall "\015\025")
114 (tit-moveright ".>")
115 (tit-moveleft ",<")
116 (tit-keyprompt nil))
117 ;; At first, collect information from the header.
118 (while (not (eobp))
119 (insert ";; ")
120 (let ((ch (following-char)))
121 (cond ((= ch ?C) ; COMMENT
122 (cond ((looking-at "COMMENT")
123 (let ((pos (match-end 0)))
124 (end-of-line)
125 (while (re-search-backward "[\"\\]" pos t)
126 (insert "\\")
127 (forward-char -1))
128 (end-of-line)
129 (setq tit-comments (cons (buffer-substring pos (point))
130 tit-comments))))))
131 ((= ch ?M) ; MULTICHOICE, MOVERIGHT, MOVELEFT
132 (cond ((looking-at "MULTICHOICE:[ \t]*")
133 (goto-char (match-end 0))
134 (setq tit-multichoice (looking-at "YES")))
135 ((looking-at "MOVERIGHT:[ \t]*")
136 (goto-char (match-end 0))
137 (setq tit-moveright (tit-read-key-value)))
138 ((looking-at "MOVELEFT:[ \t]*")
139 (goto-char (match-end 0))
140 (setq tit-moveleft (tit-read-key-value)))))
141 ((= ch ?P) ; PROMPT
142 (cond ((looking-at "PROMPT:[ \t]*")
143 (goto-char (match-end 0))
144 (setq tit-prompt (tit-read-key-value)))))
145 ((= ch ?B) ; BACKSPACE, BEGINDICTIONARY,
146 ; BEGINPHRASE
147 (cond ((looking-at "BACKSPACE:[ \t]*")
148 (goto-char (match-end 0))
149 (setq tit-backspace (tit-read-key-value)))
150 ((looking-at "BEGINDICTIONARY")
151 (setq tit-phrase nil))
152 ((looking-at "BEGINPHRASE")
153 (setq tit-phrase t))))
154 ((= ch ?K) ; KEYPROMPT
155 (cond ((looking-at "KEYPROMPT(\\(.*\\)):[ \t]*")
156 (let ((key-char (match-string 1)))
157 (goto-char (match-end 0))
158 (setq tit-keyprompt
159 (cons (cons key-char (tit-read-key-value))
160 tit-keyprompt))))))))
161 (forward-line 1))
162
163 ;; Then, generate header part of the Quail package.
164 (goto-char (point-min))
165 (insert ";; Quail package `"
166 (substring (file-name-nondirectory buffer-file-name) 0 -3)
167 "' generated by the command `titdic-convert'\n"
168 ";;\tDate: " (current-time-string) "\n"
169 ";;\tOriginal TIT dictionary file: "
170 (file-name-nondirectory filename)
171 "\n\n"
172 ";;; Comment:\n\n"
173 ";; Do byte-compile this file again after any modification.\n\n"
174 ";;; Start of the header of original TIT dictionary.\n\n")
175
176 (goto-char (point-max))
177 (insert "\n"
178 ";;; End of the header of original TIT dictionary.\n\n"
179 ";;; Code:\n\n"
180 "(require 'quail)\n\n")
181
182 (insert "(quail-define-package ")
183 ;; Args NAME, LANGUAGE, TITLE
184 (insert
185 "\""
186 (concat "quail-"
187 (substring (file-name-nondirectory buffer-file-name) 0 -3))
188 "\" \"" (nth 2 (assoc tit-encode tit-encode-list))
189 "\" \""
190 (if (string-match "[:\e$A!K\e$(0!(!J\e(B]+\\([^:\e$A!K\e$(0!(!K\e(B]+\\)" tit-prompt)
191 (substring tit-prompt (match-beginning 1) (match-end 1))
192 tit-prompt)
193 "\"\n")
194
195 ;; Arg GUIDANCE
196 (if tit-keyprompt
197 (progn
198 (insert " '(")
199 (while tit-keyprompt
200 (indent-to 3)
201 (insert (format "(%d . \"%s\")\n"
202 (string-to-char (car (car tit-keyprompt)))
203 (cdr (car tit-keyprompt))))
204 (setq tit-keyprompt (cdr tit-keyprompt)))
205 (forward-char -1)
206 (insert ")")
207 (forward-char 1))
208 (insert " t\n"))
209
210 ;; Arg DOCSTRING
211 (insert "\"" tit-prompt "\n")
212 (let ((l (nreverse tit-comments)))
213 (while l
214 (insert (format "%s\n" (car l)))
215 (setq l (cdr l))))
216 (insert "\"\n")
217
218 ;; Arg KEY-BINDINGS
219 (insert " '(")
220 (tit-generate-key-bindings tit-backspace 'quail-delete-last-char)
221 (tit-generate-key-bindings tit-deleteall 'quail-abort-translation)
222 (tit-generate-key-bindings tit-moveright 'quail-next-translation)
223 (tit-generate-key-bindings tit-moveleft 'quail-prev-translation)
224 (forward-char -1)
225 (insert ")")
226 (forward-char 1)
227
228 ;; Args FORGET-TRANSLATION, DETERMINISTIC, KBD-TRANSLATE, SHOW-LAYOUT.
229 ;; The remaining args are all nil.
230 (insert " nil"
231 (if tit-multichoice " nil" " t")
232 (if tit-keyprompt " t t)\n\n" " nil nil)\n\n")))
233
234 ;; Return the position of end of the header.
235 (point-max))
236
237;; Convert body part of TIT dictionary into `quail-define-rules'
238;; function call.
239(defun tit-process-body ()
240 (message "Formatting translation rules...")
241 (let ((enable-multibyte-characters nil)
242 (keyseq "\000")
243 pos)
244 (insert "(quail-define-rules\n")
245 (while (null (eobp))
246 (if (or (= (following-char) ?#) (= (following-char) ?\n))
247 (insert ";; ")
248 (insert "(\"")
249 (setq pos (point))
250 (skip-chars-forward "^ \t")
251 (setq keyseq
252 (concat (regexp-quote (buffer-substring pos (point))) "[ \t]+"))
253 (save-excursion
254 (while (re-search-backward "[\\\"]" pos t)
255 (insert "\\")
256 (forward-char -1)))
257 (insert "\"")
258 (skip-chars-forward " \t")
259
260 ;; Now point is at the start of translations. Remember it in
261 ;; POS and combine lines of the same key sequence while
262 ;; deleting trailing white spaces and comments (start with
263 ;; '#'). POS doesn't has to be a marker because we never
264 ;; modify region before POS.
265 (setq pos (point))
266 (if (looking-at "[^ \t]*\\([ \t]*#.*\\)")
267 (delete-region (match-beginning 1) (match-end 1)))
268 (while (and (= (forward-line 1) 0)
269 (looking-at keyseq))
270 (let ((p (match-end 0)))
271 (skip-chars-backward " \t\n")
272 (delete-region (point) p)
273 (if tit-phrase (insert " "))
274 (if (looking-at "[^ \t]*\\([ \t]*#.*\\)")
275 (delete-region (match-beginning 1) (match-end 1)))
276 ))
277
278 ;; Modify the current line to meet the syntax of Quail package.
279 (goto-char pos)
280 (if tit-phrase
281 (progn
282 ;; PHRASE1 PHRASE2 ... => ["PHRASE1" "PHRASE2" ...]
283 (insert "[\"")
284 (skip-chars-forward "^ \t\n")
285 (while (not (eolp))
286 (insert "\"")
287 (forward-char 1)
288 (insert "\"")
289 (skip-chars-forward "^ \t\n"))
290 (insert "\"])"))
291 ;; TRANSLATIONS => "TRANSLATIONS"
292 (insert "\"")
293 (end-of-line)
294 (insert "\")")))
295 (forward-line 1))
296 (insert ")\n")))
297
298;;;###autoload
299(defun titdic-convert (filename &optional dirname)
300 "Convert a TIT dictionary of FILENAME into a Quail package.
301Optional argument DIRNAME if specified is the directory name under which
302the generated Quail package is saved."
303 (interactive "FTIT dictionary file: ")
304 (let ((buf (get-buffer-create "*tit-work*")))
305 (save-excursion
306 ;; Setup the buffer.
307 (set-buffer buf)
308 (erase-buffer)
309 (let ((coding-system-for-read 'no-conversion))
310 (insert-file-contents (expand-file-name filename)))
311 (set-visited-file-name (tit-make-quail-package-name filename dirname) t)
312 (set-buffer-file-coding-system 'coding-system-iso-2022-7)
313
314 ;; Decode the buffer contents from the encoding specified by a
315 ;; value of the key "ENCODE:".
316 (let (coding-system)
317 (save-excursion
318 (if (search-forward "\nBEGIN" nil t)
319 (let ((limit (point))
320 slot)
321 (goto-char 1)
322 (if (re-search-forward "^ENCODE:[ \t]*" limit t)
323 (progn
324 (goto-char (match-end 0))
325 (setq tit-encode (tit-read-key-value)))
326 (setq tit-encode tit-default-encode))
327 (setq slot (assoc tit-encode tit-encode-list))
328 (if slot
329 (setq coding-system (nth 1 slot))
330 (error "Invalid ENCODE: value in TIT dictionary")))
331 (error "TIT dictionary doesn't have body part")))
332 (message "Decoding %s..." coding-system)
333 (goto-char 1)
334 (decode-coding-region 1 (point-max) coding-system))
335
336 ;; Set point the starting position of the body part.
337 (goto-char 1)
338 (if (search-forward "\nBEGIN" nil t)
339 (forward-line 1)
340 (error "TIT dictionary can't be decoded correctly"))
341
342 ;; Now process the header and body parts.
343 (goto-char
344 (save-excursion
345 (save-restriction
346 (narrow-to-region 1 (point))
347 (tit-process-header filename))))
348 (tit-process-body))
349
350 (if noninteractive
351 ;; Save the Quail package file.
352 (save-excursion
353 (set-buffer buf)
354 (save-buffer 0))
355 ;; Show the Quail package just generated.
356 (switch-to-buffer buf)
357 (goto-char 1)
358 (message "Save this buffer after you make any modification"))))
359
360;;;###autoload
361(defun batch-titdic-convert ()
362 "Run `titdic-convert' on the files remaining on the command line.
363Use this from the command line, with `-batch';
364it won't work in an interactive Emacs.
365For example, invoke \"emacs -batch -f batch-titdic-convert XXX.tit\" to
366 generate Quail package file \"xxx.el\" from TIT dictionary file \"XXX.tit\".
367To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
368 (defvar command-line-args-left) ; Avoid compiler warning.
369 (if (not noninteractive)
370 (error "`batch-titdic-convert' should be used only with -batch"))
371 (if (string= (car command-line-args-left) "-h")
372 (progn
373 (message "To convert XXX.tit and YYY.tit into xxx.el and yyy.el:")
374 (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert XXX.tit YYY.tit")
375 (message "To convert XXX.tit into DIR/xxx.el:")
376 (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert -dir DIR XXX.tit"))
377 (let (targetdir filename files file)
378 (if (string= (car command-line-args-left) "-dir")
379 (progn
380 (setq command-line-args-left (cdr command-line-args-left))
381 (setq targetdir (car command-line-args-left))
382 (setq command-line-args-left (cdr command-line-args-left))))
383 (while command-line-args-left
384 (setq filename (expand-file-name (car command-line-args-left)))
385 (if (file-directory-p filename)
386 (progn
387 (message "Converting all tit files in the directory %s" filename)
388 (setq files (directory-files filename t "\\.tit$")))
389 (setq files (list filename)))
390 (while files
391 (setq file (expand-file-name (car files)))
392 (if (file-newer-than-file-p
393 file (tit-make-quail-package-name file targetdir))
394 (progn
395 (message "Converting %s to quail-package..." file)
396 (titdic-convert file targetdir)))
397 (setq files (cdr files)))
398 (setq command-line-args-left (cdr command-line-args-left)))
399 (message "Do byte-compile the created files by:")
400 (message " %% emacs -batch -f batch-byte-compile XXX.el")))
401 (kill-emacs 0))
402
403;;; titdic-cnv.el ends here