(titdic-convert): Set
[bpt/emacs.git] / lisp / international / titdic-cnv.el
CommitLineData
49ed466f 1;;; titdic-cnv.el --- convert cxterm dictionary (TIT format) to Quail package
4ed46869 2
4ed46869 3;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
fa526c4a 4;; Licensed to the Free Software Foundation.
4ed46869
KH
5
6;; Keywords: Quail, TIT, cxterm
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
369314dc
KH
21;; along with GNU Emacs; see the file COPYING. If not, write to the
22;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23;; Boston, MA 02111-1307, USA.
4ed46869
KH
24
25;;; Comments:
26
49ed466f 27;; Convert cxterm dictionary (of TIT format) to quail-package.
4ed46869
KH
28;;
29;; Usage (within Emacs):
49ed466f 30;; M-x titdic-convert<CR>CXTERM-DICTIONARY-NAME<CR>
4ed46869 31;; Usage (from shell):
49ed466f 32;; % emacs -batch -l titdic-cnv -f batch-titdic-convert\
4ed46869
KH
33;; [-dir DIR] [DIR | FILE] ...
34;;
35;; When you run titdic-convert within Emacs, you have a chance to
36;; modify arguments of `quail-define-package' before saving the
37;; converted file. For instance, you are likely to modify TITLE,
38;; DOCSTRING, and KEY-BINDINGS.
39
49ed466f 40;; Cxterm dictionary file (*.tit) is a line-oriented text (English,
4ed46869
KH
41;; Chinese, Japanese, and Korean) file. The whole file contains of
42;; two parts, the definition part (`header' here after) followed by
43;; the dictionary part (`body' here after). All lines begin with
44;; leading '#' are ignored.
45;;
46;; Each line in the header part has two fields, KEY and VALUE. These
47;; fields are separated by one or more white characters.
48;;
49;; Each line in the body part has two fields, KEYSEQ and TRANSLATIONS.
50;; These fields are separated by one or more white characters.
51;;
52;; See the manual page of `tit2cit' of cxterm distribution for more
53;; detail.
54
55;;; Code:
56
57(require 'quail)
58
49ed466f 59;; List of values of key "ENCODE:" and the corresponding Emacs
4ed46869
KH
60;; coding-system and language environment name.
61(defvar tit-encode-list
a7f2c216
KH
62 '(("GB" euc-china "Chinese-GB")
63 ("BIG5" cn-big5 "Chinese-BIG5")
64 ("JIS" euc-japan "Japanese")
49ed466f
KH
65 ("KS" euc-kr "Korean")))
66
67;; List of package names and the corresponding titles.
68(defvar quail-cxterm-package-title-alist
69 '(("chinese-4corner" . "\e$(0(?-F\e(B")
70 ("chinese-array30" . "\e$(0#R#O\e(B")
71 ("chinese-ccdospy" . "\e$AKuF4\e(B")
72 ("chinese-ctlau" . "\e$AAuTA\e(B")
73 ("chinese-ctlaub" . "\e$(0N,Gn\e(B")
74 ("chinese-ecdict" . "\e$(05CKH\e(B")
75 ("chinese-etzy" . "\e$(06/0D\e(B")
76 ("chinese-punct-b5" . "\e$(0O:\e(BB")
77 ("chinese-punct" . "\e$A1j\e(BG")
78 ("chinese-py-b5" . "\e$(03<\e(BB")
79 ("chinese-py" . "\e$AF4\e(BG")
80 ("chinese-qj-b5" . "\e$(0)A\e(BB")
81 ("chinese-qj" . "\e$AH+\e(BG")
82 ("chinese-sw" . "\e$AJWN2\e(B")
83 ("chinese-tonepy" . "\e$A5wF4\e(B")
84 ("chinese-ziranma" . "\e$AK+F4\e(B")
85 ("chinese-zozy" . "\e$(0I\0D\e(B")))
4ed46869
KH
86
87;; Return a value of the key in the current line.
88(defsubst tit-read-key-value ()
89 (if (looking-at "[^ \t\n]+")
90 (car (read-from-string (concat "\"" (match-string 0) "\"")))))
91
92;; Return an appropriate quail-package filename from FILENAME (TIT
49ed466f
KH
93;; dictionary filename). For instance, ".../ZOZY.tit" -> "ZOZY.el".
94(defun tit-make-quail-package-file-name (filename &optional dirname)
4ed46869 95 (expand-file-name
49ed466f 96 (concat (file-name-nondirectory (substring filename 0 -4)) ".el")
4ed46869
KH
97 dirname))
98
99;; This value is t if we are processing phrase dictionary.
100(defvar tit-phrase nil)
101(defvar tit-encode nil)
102(defvar tit-default-encode "GB")
103
104;; Generate elements of KEY-BINDINGS arg for `quail-define-package' so
105;; that each characters in KEYS invokes FUNCTION-SYMBOL.
106(defun tit-generate-key-bindings (keys function-symbol)
107 (let ((len (length keys))
108 (i 0)
109 key)
110 (while (< i len)
111 (setq key (aref keys i))
112 (indent-to 3)
113 (if (< key ?\ )
114 (if (eq (lookup-key quail-translation-keymap (char-to-string key))
115 'quail-execute-non-quail-command)
116 (insert (format "(\"\\C-%c\" . %s)\n"
117 (+ key ?@) function-symbol)))
118 (if (< key 127)
119 (insert (format "(\"%c\" . %s)\n" key function-symbol))
120 (insert (format "(\"\\C-?\" . %s)\n" function-symbol))))
121 (setq i (1+ i)))))
122
123;; Analyze header part of TIT dictionary and generate an appropriate
124;; `quail-define-package' function call.
125(defun tit-process-header (filename)
126 (message "Processing header part...")
127 (goto-char (point-min))
128
129 (let (;; TIT keywords and the corresponding default values.
130 (tit-multichoice t)
131 (tit-prompt "")
132 (tit-comments nil)
133 (tit-backspace "\010\177")
134 (tit-deleteall "\015\025")
135 (tit-moveright ".>")
136 (tit-moveleft ",<")
137 (tit-keyprompt nil))
138 ;; At first, collect information from the header.
139 (while (not (eobp))
140 (insert ";; ")
141 (let ((ch (following-char)))
142 (cond ((= ch ?C) ; COMMENT
143 (cond ((looking-at "COMMENT")
144 (let ((pos (match-end 0)))
145 (end-of-line)
146 (while (re-search-backward "[\"\\]" pos t)
147 (insert "\\")
148 (forward-char -1))
149 (end-of-line)
150 (setq tit-comments (cons (buffer-substring pos (point))
151 tit-comments))))))
152 ((= ch ?M) ; MULTICHOICE, MOVERIGHT, MOVELEFT
153 (cond ((looking-at "MULTICHOICE:[ \t]*")
154 (goto-char (match-end 0))
155 (setq tit-multichoice (looking-at "YES")))
156 ((looking-at "MOVERIGHT:[ \t]*")
157 (goto-char (match-end 0))
158 (setq tit-moveright (tit-read-key-value)))
159 ((looking-at "MOVELEFT:[ \t]*")
160 (goto-char (match-end 0))
161 (setq tit-moveleft (tit-read-key-value)))))
162 ((= ch ?P) ; PROMPT
163 (cond ((looking-at "PROMPT:[ \t]*")
164 (goto-char (match-end 0))
165 (setq tit-prompt (tit-read-key-value)))))
166 ((= ch ?B) ; BACKSPACE, BEGINDICTIONARY,
167 ; BEGINPHRASE
168 (cond ((looking-at "BACKSPACE:[ \t]*")
169 (goto-char (match-end 0))
170 (setq tit-backspace (tit-read-key-value)))
171 ((looking-at "BEGINDICTIONARY")
172 (setq tit-phrase nil))
173 ((looking-at "BEGINPHRASE")
174 (setq tit-phrase t))))
175 ((= ch ?K) ; KEYPROMPT
176 (cond ((looking-at "KEYPROMPT(\\(.*\\)):[ \t]*")
177 (let ((key-char (match-string 1)))
178 (goto-char (match-end 0))
1fa1e1f5
RS
179 (if (string-match "\\\\[0-9]+" key-char)
180 (setq key-char
181 (car (read-from-string (format "\"%s\""
182 key-char)))))
4ed46869
KH
183 (setq tit-keyprompt
184 (cons (cons key-char (tit-read-key-value))
185 tit-keyprompt))))))))
186 (forward-line 1))
187
188 ;; Then, generate header part of the Quail package.
189 (goto-char (point-min))
49ed466f
KH
190 (let ((package
191 (concat
192 "chinese-"
193 (substring (downcase (file-name-nondirectory buffer-file-name))
194 0 -3))))
195 (insert ";; Quail package `"
196 package
197 "' generated by the command `titdic-convert'\n"
198 ";;\tDate: " (current-time-string) "\n"
199 ";;\tOriginal TIT dictionary file: "
200 (file-name-nondirectory filename)
201 "\n\n"
202 ";;; Comment:\n\n"
203 ";; Do byte-compile this file again after any modification.\n\n"
204 ";;; Start of the header of original TIT dictionary.\n\n")
205
206 (goto-char (point-max))
207 (insert "\n"
208 ";;; End of the header of original TIT dictionary.\n\n"
209 ";;; Code:\n\n"
210 "(require 'quail)\n\n")
211
212 (insert "(quail-define-package ")
213 ;; Args NAME, LANGUAGE, TITLE
214 (let ((title (cdr (assoc package quail-cxterm-package-title-alist))))
215 (insert
216 "\""
217 package
218 "\" \"" (nth 2 (assoc tit-encode tit-encode-list))
219 "\" \""
220 (or title
221 (if (string-match "[:\e$A!K\e$(0!(!J\e(B]+\\([^:\e$A!K\e$(0!(!K\e(B]+\\)" tit-prompt)
222 (substring tit-prompt (match-beginning 1) (match-end 1))
223 tit-prompt))
224 "\"\n"))
225 )
4ed46869
KH
226
227 ;; Arg GUIDANCE
228 (if tit-keyprompt
229 (progn
230 (insert " '(")
231 (while tit-keyprompt
232 (indent-to 3)
233 (insert (format "(%d . \"%s\")\n"
234 (string-to-char (car (car tit-keyprompt)))
235 (cdr (car tit-keyprompt))))
236 (setq tit-keyprompt (cdr tit-keyprompt)))
237 (forward-char -1)
238 (insert ")")
239 (forward-char 1))
240 (insert " t\n"))
241
242 ;; Arg DOCSTRING
243 (insert "\"" tit-prompt "\n")
244 (let ((l (nreverse tit-comments)))
245 (while l
246 (insert (format "%s\n" (car l)))
247 (setq l (cdr l))))
248 (insert "\"\n")
249
250 ;; Arg KEY-BINDINGS
251 (insert " '(")
252 (tit-generate-key-bindings tit-backspace 'quail-delete-last-char)
253 (tit-generate-key-bindings tit-deleteall 'quail-abort-translation)
254 (tit-generate-key-bindings tit-moveright 'quail-next-translation)
255 (tit-generate-key-bindings tit-moveleft 'quail-prev-translation)
256 (forward-char -1)
257 (insert ")")
258 (forward-char 1)
259
260 ;; Args FORGET-TRANSLATION, DETERMINISTIC, KBD-TRANSLATE, SHOW-LAYOUT.
261 ;; The remaining args are all nil.
262 (insert " nil"
263 (if tit-multichoice " nil" " t")
264 (if tit-keyprompt " t t)\n\n" " nil nil)\n\n")))
265
266 ;; Return the position of end of the header.
267 (point-max))
268
269;; Convert body part of TIT dictionary into `quail-define-rules'
270;; function call.
271(defun tit-process-body ()
272 (message "Formatting translation rules...")
d9fd228d 273 (let ((keyseq "\000")
4ed46869
KH
274 pos)
275 (insert "(quail-define-rules\n")
276 (while (null (eobp))
277 (if (or (= (following-char) ?#) (= (following-char) ?\n))
08a1bf22
KH
278 (progn
279 (insert ";; ")
280 (forward-line 1))
4ed46869
KH
281 (insert "(\"")
282 (setq pos (point))
283 (skip-chars-forward "^ \t")
284 (setq keyseq
285 (concat (regexp-quote (buffer-substring pos (point))) "[ \t]+"))
286 (save-excursion
c374d5ed
KH
287 ;; Escape `"' and `\' which is not used for quoting the
288 ;; following octal digits.
289 (while (re-search-backward "\"\\|\\\\[^0-9]" pos t)
4ed46869
KH
290 (insert "\\")
291 (forward-char -1)))
292 (insert "\"")
293 (skip-chars-forward " \t")
294
295 ;; Now point is at the start of translations. Remember it in
296 ;; POS and combine lines of the same key sequence while
297 ;; deleting trailing white spaces and comments (start with
298 ;; '#'). POS doesn't has to be a marker because we never
299 ;; modify region before POS.
300 (setq pos (point))
301 (if (looking-at "[^ \t]*\\([ \t]*#.*\\)")
302 (delete-region (match-beginning 1) (match-end 1)))
303 (while (and (= (forward-line 1) 0)
304 (looking-at keyseq))
305 (let ((p (match-end 0)))
306 (skip-chars-backward " \t\n")
307 (delete-region (point) p)
308 (if tit-phrase (insert " "))
309 (if (looking-at "[^ \t]*\\([ \t]*#.*\\)")
310 (delete-region (match-beginning 1) (match-end 1)))
311 ))
312
4ed46869 313 (goto-char pos)
08a1bf22
KH
314 (if (eolp)
315 ;; This entry contains no translations. Let's ignore it.
4ed46869 316 (progn
08a1bf22
KH
317 (beginning-of-line)
318 (setq pos (point))
319 (forward-line 1)
320 (delete-region pos (point)))
321
322 ;; Modify the current line to meet the syntax of Quail package.
323 (if tit-phrase
324 (progn
325 ;; PHRASE1 PHRASE2 ... => ["PHRASE1" "PHRASE2" ...]
1fa1e1f5
RS
326 (insert "[")
327 (skip-chars-forward " \t")
08a1bf22
KH
328 (while (not (eolp))
329 (insert "\"")
1fa1e1f5 330 (skip-chars-forward "^ \t\n")
08a1bf22 331 (insert "\"")
1fa1e1f5
RS
332 (skip-chars-forward " \t"))
333 (insert "])"))
08a1bf22
KH
334 ;; TRANSLATIONS => "TRANSLATIONS"
335 (insert "\"")
336 (end-of-line)
1fa1e1f5 337 (skip-chars-backward " \t")
08a1bf22
KH
338 (insert "\")"))
339 (forward-line 1))))
4ed46869
KH
340 (insert ")\n")))
341
342;;;###autoload
343(defun titdic-convert (filename &optional dirname)
344 "Convert a TIT dictionary of FILENAME into a Quail package.
345Optional argument DIRNAME if specified is the directory name under which
346the generated Quail package is saved."
347 (interactive "FTIT dictionary file: ")
348 (let ((buf (get-buffer-create "*tit-work*")))
349 (save-excursion
350 ;; Setup the buffer.
351 (set-buffer buf)
352 (erase-buffer)
353 (let ((coding-system-for-read 'no-conversion))
354 (insert-file-contents (expand-file-name filename)))
49ed466f
KH
355 (set-visited-file-name
356 (tit-make-quail-package-file-name filename dirname) t)
d9fd228d 357 (setq enable-multibyte-characters t)
c47cd165 358 (set-buffer-file-coding-system 'iso-2022-7bit)
4ed46869
KH
359
360 ;; Decode the buffer contents from the encoding specified by a
361 ;; value of the key "ENCODE:".
362 (let (coding-system)
363 (save-excursion
364 (if (search-forward "\nBEGIN" nil t)
365 (let ((limit (point))
366 slot)
367 (goto-char 1)
368 (if (re-search-forward "^ENCODE:[ \t]*" limit t)
369 (progn
370 (goto-char (match-end 0))
371 (setq tit-encode (tit-read-key-value)))
372 (setq tit-encode tit-default-encode))
373 (setq slot (assoc tit-encode tit-encode-list))
374 (if slot
375 (setq coding-system (nth 1 slot))
376 (error "Invalid ENCODE: value in TIT dictionary")))
377 (error "TIT dictionary doesn't have body part")))
378 (message "Decoding %s..." coding-system)
379 (goto-char 1)
380 (decode-coding-region 1 (point-max) coding-system))
381
382 ;; Set point the starting position of the body part.
383 (goto-char 1)
384 (if (search-forward "\nBEGIN" nil t)
385 (forward-line 1)
386 (error "TIT dictionary can't be decoded correctly"))
387
388 ;; Now process the header and body parts.
389 (goto-char
390 (save-excursion
391 (save-restriction
392 (narrow-to-region 1 (point))
393 (tit-process-header filename))))
394 (tit-process-body))
395
396 (if noninteractive
397 ;; Save the Quail package file.
398 (save-excursion
399 (set-buffer buf)
400 (save-buffer 0))
401 ;; Show the Quail package just generated.
402 (switch-to-buffer buf)
403 (goto-char 1)
404 (message "Save this buffer after you make any modification"))))
405
406;;;###autoload
44cbfae9 407(defun batch-titdic-convert (&optional force)
4ed46869
KH
408 "Run `titdic-convert' on the files remaining on the command line.
409Use this from the command line, with `-batch';
410it won't work in an interactive Emacs.
411For example, invoke \"emacs -batch -f batch-titdic-convert XXX.tit\" to
412 generate Quail package file \"xxx.el\" from TIT dictionary file \"XXX.tit\".
413To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
414 (defvar command-line-args-left) ; Avoid compiler warning.
415 (if (not noninteractive)
416 (error "`batch-titdic-convert' should be used only with -batch"))
417 (if (string= (car command-line-args-left) "-h")
418 (progn
419 (message "To convert XXX.tit and YYY.tit into xxx.el and yyy.el:")
420 (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert XXX.tit YYY.tit")
421 (message "To convert XXX.tit into DIR/xxx.el:")
422 (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert -dir DIR XXX.tit"))
423 (let (targetdir filename files file)
424 (if (string= (car command-line-args-left) "-dir")
425 (progn
426 (setq command-line-args-left (cdr command-line-args-left))
427 (setq targetdir (car command-line-args-left))
428 (setq command-line-args-left (cdr command-line-args-left))))
429 (while command-line-args-left
430 (setq filename (expand-file-name (car command-line-args-left)))
431 (if (file-directory-p filename)
432 (progn
433 (message "Converting all tit files in the directory %s" filename)
434 (setq files (directory-files filename t "\\.tit$")))
435 (setq files (list filename)))
436 (while files
437 (setq file (expand-file-name (car files)))
44cbfae9
KH
438 (when (or force
439 (file-newer-than-file-p
440 file (tit-make-quail-package-file-name file targetdir)))
441 (message "Converting %s to quail-package..." file)
442 (titdic-convert file targetdir))
4ed46869
KH
443 (setq files (cdr files)))
444 (setq command-line-args-left (cdr command-line-args-left)))
445 (message "Do byte-compile the created files by:")
446 (message " %% emacs -batch -f batch-byte-compile XXX.el")))
447 (kill-emacs 0))
448
449;;; titdic-cnv.el ends here