Write fns-*.el in current directory instead of
[bpt/emacs.git] / lisp / international / titdic-cnv.el
CommitLineData
49ed466f 1;;; titdic-cnv.el --- convert cxterm dictionary (TIT format) to Quail package
4ed46869 2
4ed46869 3;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
fa526c4a 4;; Licensed to the Free Software Foundation.
4ed46869
KH
5
6;; Keywords: Quail, TIT, cxterm
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
369314dc
KH
21;; along with GNU Emacs; see the file COPYING. If not, write to the
22;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23;; Boston, MA 02111-1307, USA.
4ed46869
KH
24
25;;; Comments:
26
49ed466f 27;; Convert cxterm dictionary (of TIT format) to quail-package.
4ed46869
KH
28;;
29;; Usage (within Emacs):
49ed466f 30;; M-x titdic-convert<CR>CXTERM-DICTIONARY-NAME<CR>
4ed46869 31;; Usage (from shell):
49ed466f 32;; % emacs -batch -l titdic-cnv -f batch-titdic-convert\
4ed46869
KH
33;; [-dir DIR] [DIR | FILE] ...
34;;
35;; When you run titdic-convert within Emacs, you have a chance to
36;; modify arguments of `quail-define-package' before saving the
37;; converted file. For instance, you are likely to modify TITLE,
38;; DOCSTRING, and KEY-BINDINGS.
39
49ed466f 40;; Cxterm dictionary file (*.tit) is a line-oriented text (English,
4ed46869
KH
41;; Chinese, Japanese, and Korean) file. The whole file contains of
42;; two parts, the definition part (`header' here after) followed by
43;; the dictionary part (`body' here after). All lines begin with
44;; leading '#' are ignored.
45;;
46;; Each line in the header part has two fields, KEY and VALUE. These
47;; fields are separated by one or more white characters.
48;;
49;; Each line in the body part has two fields, KEYSEQ and TRANSLATIONS.
50;; These fields are separated by one or more white characters.
51;;
52;; See the manual page of `tit2cit' of cxterm distribution for more
53;; detail.
54
55;;; Code:
56
57(require 'quail)
58
49ed466f 59;; List of values of key "ENCODE:" and the corresponding Emacs
4ed46869
KH
60;; coding-system and language environment name.
61(defvar tit-encode-list
a7f2c216
KH
62 '(("GB" euc-china "Chinese-GB")
63 ("BIG5" cn-big5 "Chinese-BIG5")
64 ("JIS" euc-japan "Japanese")
49ed466f
KH
65 ("KS" euc-kr "Korean")))
66
67;; List of package names and the corresponding titles.
68(defvar quail-cxterm-package-title-alist
69 '(("chinese-4corner" . "\e$(0(?-F\e(B")
70 ("chinese-array30" . "\e$(0#R#O\e(B")
71 ("chinese-ccdospy" . "\e$AKuF4\e(B")
72 ("chinese-ctlau" . "\e$AAuTA\e(B")
73 ("chinese-ctlaub" . "\e$(0N,Gn\e(B")
74 ("chinese-ecdict" . "\e$(05CKH\e(B")
75 ("chinese-etzy" . "\e$(06/0D\e(B")
76 ("chinese-punct-b5" . "\e$(0O:\e(BB")
77 ("chinese-punct" . "\e$A1j\e(BG")
78 ("chinese-py-b5" . "\e$(03<\e(BB")
79 ("chinese-py" . "\e$AF4\e(BG")
80 ("chinese-qj-b5" . "\e$(0)A\e(BB")
81 ("chinese-qj" . "\e$AH+\e(BG")
82 ("chinese-sw" . "\e$AJWN2\e(B")
83 ("chinese-tonepy" . "\e$A5wF4\e(B")
84 ("chinese-ziranma" . "\e$AK+F4\e(B")
85 ("chinese-zozy" . "\e$(0I\0D\e(B")))
4ed46869
KH
86
87;; Return a value of the key in the current line.
88(defsubst tit-read-key-value ()
89 (if (looking-at "[^ \t\n]+")
90 (car (read-from-string (concat "\"" (match-string 0) "\"")))))
91
92;; Return an appropriate quail-package filename from FILENAME (TIT
49ed466f
KH
93;; dictionary filename). For instance, ".../ZOZY.tit" -> "ZOZY.el".
94(defun tit-make-quail-package-file-name (filename &optional dirname)
4ed46869 95 (expand-file-name
49ed466f 96 (concat (file-name-nondirectory (substring filename 0 -4)) ".el")
4ed46869
KH
97 dirname))
98
1375754c
KH
99;; This value is nil if we are processing phrase dictionary.
100(defconst tit-dictionary t)
4ed46869
KH
101(defvar tit-encode nil)
102(defvar tit-default-encode "GB")
103
104;; Generate elements of KEY-BINDINGS arg for `quail-define-package' so
105;; that each characters in KEYS invokes FUNCTION-SYMBOL.
106(defun tit-generate-key-bindings (keys function-symbol)
107 (let ((len (length keys))
108 (i 0)
1375754c 109 (first t)
4ed46869
KH
110 key)
111 (while (< i len)
1375754c 112 (or first (princ "\n "))
4ed46869 113 (setq key (aref keys i))
1375754c
KH
114 (if (if (< key ?\ )
115 (eq (lookup-key quail-translation-keymap
116 (char-to-string key))
4ed46869 117 'quail-execute-non-quail-command)
1375754c
KH
118 (<= key 127))
119 (progn
120 (princ (cons (cond ((< key ?\ ) (format "\"\\C-%c\"" (+ key ?@)))
121 ((< key 127) (format "\"%c\"" key))
122 (t "\"\\C-?\""))
123 function-symbol))
124 (setq first nil)))
4ed46869
KH
125 (setq i (1+ i)))))
126
127;; Analyze header part of TIT dictionary and generate an appropriate
128;; `quail-define-package' function call.
129(defun tit-process-header (filename)
130 (message "Processing header part...")
131 (goto-char (point-min))
132
1375754c
KH
133 ;; At first, generate header part of the Quail package while
134 ;; collecting information from the original header.
135 (let ((package (concat
136 "chinese-"
137 (substring (downcase (file-name-nondirectory filename))
138 0 -4)))
139 ;; TIT keywords and the corresponding default values.
4ed46869
KH
140 (tit-multichoice t)
141 (tit-prompt "")
142 (tit-comments nil)
143 (tit-backspace "\010\177")
144 (tit-deleteall "\015\025")
145 (tit-moveright ".>")
146 (tit-moveleft ",<")
147 (tit-keyprompt nil))
1375754c
KH
148
149 (princ ";; Quail package `")
150 (princ package)
151 (princ "' generated by the command `titdic-convert'\n;;\tDate: ")
152 (princ (current-time-string))
153 (princ "\n;;\tOriginal TIT dictionary file: ")
154 (princ (file-name-nondirectory filename))
155 (princ "\n\n;;; Comment:\n\n")
86e4f7c0 156 (princ ";; Byte-compile this file again after any modification.\n\n")
1375754c
KH
157 (princ ";;; Start of the header of original TIT dictionary.\n\n")
158
4ed46869 159 (while (not (eobp))
1375754c
KH
160 (let ((ch (following-char))
161 (pos (point)))
4ed46869
KH
162 (cond ((= ch ?C) ; COMMENT
163 (cond ((looking-at "COMMENT")
164 (let ((pos (match-end 0)))
165 (end-of-line)
4ed46869
KH
166 (setq tit-comments (cons (buffer-substring pos (point))
167 tit-comments))))))
168 ((= ch ?M) ; MULTICHOICE, MOVERIGHT, MOVELEFT
169 (cond ((looking-at "MULTICHOICE:[ \t]*")
170 (goto-char (match-end 0))
171 (setq tit-multichoice (looking-at "YES")))
172 ((looking-at "MOVERIGHT:[ \t]*")
173 (goto-char (match-end 0))
174 (setq tit-moveright (tit-read-key-value)))
175 ((looking-at "MOVELEFT:[ \t]*")
176 (goto-char (match-end 0))
177 (setq tit-moveleft (tit-read-key-value)))))
178 ((= ch ?P) ; PROMPT
179 (cond ((looking-at "PROMPT:[ \t]*")
180 (goto-char (match-end 0))
181 (setq tit-prompt (tit-read-key-value)))))
182 ((= ch ?B) ; BACKSPACE, BEGINDICTIONARY,
183 ; BEGINPHRASE
184 (cond ((looking-at "BACKSPACE:[ \t]*")
185 (goto-char (match-end 0))
186 (setq tit-backspace (tit-read-key-value)))
187 ((looking-at "BEGINDICTIONARY")
1375754c 188 (setq tit-dictionary t))
4ed46869 189 ((looking-at "BEGINPHRASE")
1375754c 190 (setq tit-dictionary nil))))
4ed46869
KH
191 ((= ch ?K) ; KEYPROMPT
192 (cond ((looking-at "KEYPROMPT(\\(.*\\)):[ \t]*")
193 (let ((key-char (match-string 1)))
194 (goto-char (match-end 0))
1fa1e1f5
RS
195 (if (string-match "\\\\[0-9]+" key-char)
196 (setq key-char
197 (car (read-from-string (format "\"%s\""
198 key-char)))))
4ed46869
KH
199 (setq tit-keyprompt
200 (cons (cons key-char (tit-read-key-value))
1375754c
KH
201 tit-keyprompt)))))))
202 (end-of-line)
203 (princ ";; ")
204 (princ (buffer-substring pos (point)))
205 (princ "\n")
206 (forward-line 1)))
4ed46869 207
1375754c
KH
208 (princ "\n;;; End of the header of original TIT dictionary.\n\n")
209 (princ ";;; Code:\n\n(require 'quail)\n\n")
210
211 (princ "(quail-define-package ")
212 ;; Args NAME, LANGUAGE, TITLE
213 (let ((title (cdr (assoc package quail-cxterm-package-title-alist))))
214 (princ "\"")
215 (princ package)
216 (princ "\" \"")
217 (princ (nth 2 (assoc tit-encode tit-encode-list)))
218 (princ "\" \"")
219 (princ (or title
220 (if (string-match "[:\e$A!K\e$(0!(!J\e(B]+\\([^:\e$A!K\e$(0!(!K\e(B]+\\)" tit-prompt)
221 (substring tit-prompt (match-beginning 1) (match-end 1))
222 tit-prompt)))
223 (princ "\"\n"))
4ed46869
KH
224
225 ;; Arg GUIDANCE
226 (if tit-keyprompt
227 (progn
1375754c 228 (princ " '(")
4ed46869 229 (while tit-keyprompt
1375754c
KH
230 (princ " ")
231 (princ (format "(%d . \"%s\")\n"
232 (string-to-char (car (car tit-keyprompt)))
233 (cdr (car tit-keyprompt))))
4ed46869 234 (setq tit-keyprompt (cdr tit-keyprompt)))
1375754c
KH
235 (princ ")"))
236 (princ " t\n"))
4ed46869
KH
237
238 ;; Arg DOCSTRING
1375754c
KH
239 (prin1
240 (mapconcat 'identity (cons tit-prompt (nreverse tit-comments)) "\n"))
241 (terpri)
4ed46869
KH
242
243 ;; Arg KEY-BINDINGS
1375754c 244 (princ " '(")
4ed46869 245 (tit-generate-key-bindings tit-backspace 'quail-delete-last-char)
1375754c 246 (princ "\n ")
4ed46869 247 (tit-generate-key-bindings tit-deleteall 'quail-abort-translation)
1375754c 248 (princ "\n ")
4ed46869 249 (tit-generate-key-bindings tit-moveright 'quail-next-translation)
1375754c 250 (princ "\n ")
4ed46869 251 (tit-generate-key-bindings tit-moveleft 'quail-prev-translation)
1375754c 252 (princ ")\n")
4ed46869
KH
253
254 ;; Args FORGET-TRANSLATION, DETERMINISTIC, KBD-TRANSLATE, SHOW-LAYOUT.
255 ;; The remaining args are all nil.
1375754c
KH
256 (princ " nil")
257 (princ (if tit-multichoice " nil" " t"))
258 (princ (if tit-keyprompt " t t)\n\n" " nil nil)\n\n"))))
259
260(defsubst tit-flush-translations (key translations)
261 (if (string-match "\\\\[0-9][0-9][0-9]" key)
262 (let ((newkey (concat (substring key 0 (match-beginning 0))
263 (car (read-from-string
264 (concat "\"" (match-string 0 key) "\"")))))
265 (idx (match-end 0)))
266 (while (string-match "\\\\[0-9][0-9][0-9]" key idx)
267 (setq newkey (concat
268 newkey
269 (substring key idx (match-beginning 0))
270 (car (read-from-string
271 (concat "\"" (match-string 0 key) "\"")))))
272 (setq idx (match-end 0)))
273 (setq key (concat newkey (substring key idx)))))
274 (prin1 (list key (if tit-dictionary translations
275 (vconcat (nreverse translations)))))
276 (princ "\n"))
4ed46869
KH
277
278;; Convert body part of TIT dictionary into `quail-define-rules'
279;; function call.
280(defun tit-process-body ()
281 (message "Formatting translation rules...")
1375754c
KH
282 (let* ((template (list nil nil))
283 (second (cdr template))
284 (prev-key "")
285 ch key translations pos)
286 (princ "(quail-define-rules\n")
4ed46869 287 (while (null (eobp))
1375754c
KH
288 (setq ch (following-char))
289 (if (or (= ch ?#) (= ch ?\n))
290 (forward-line 1)
4ed46869 291 (setq pos (point))
1375754c
KH
292 (skip-chars-forward "^ \t\n")
293 (setq key (buffer-substring pos (point)))
4ed46869 294 (skip-chars-forward " \t")
1375754c
KH
295 (setq ch (following-char))
296 (if (or (= ch ?#) (= ch ?\n))
08a1bf22 297 ;; This entry contains no translations. Let's ignore it.
1375754c
KH
298 (forward-line 1)
299 (or (string= key prev-key)
08a1bf22 300 (progn
1375754c
KH
301 (if translations
302 (tit-flush-translations prev-key translations))
303 (setq translations nil
304 prev-key key)))
305 (if tit-dictionary
306 (progn
307 (setq pos (point))
308 (skip-chars-forward "^ \t#\n")
309 (setq translations
310 (if translations
311 (concat translations
312 (buffer-substring pos (point)))
313 (buffer-substring pos (point)))))
314 (while (not (eolp))
315 (setq pos (point))
316 (skip-chars-forward "^ \t\n")
317 (setq translations (cons (buffer-substring pos (point))
318 translations))
319 (skip-chars-forward " \t")
320 (setq ch (following-char))
321 (if (= ch ?#) (end-of-line))))
08a1bf22 322 (forward-line 1))))
1375754c
KH
323
324 (if translations
325 (tit-flush-translations prev-key translations))
326 (princ ")\n")))
4ed46869
KH
327
328;;;###autoload
329(defun titdic-convert (filename &optional dirname)
330 "Convert a TIT dictionary of FILENAME into a Quail package.
331Optional argument DIRNAME if specified is the directory name under which
332the generated Quail package is saved."
333 (interactive "FTIT dictionary file: ")
1375754c
KH
334 (with-temp-file (tit-make-quail-package-file-name filename dirname)
335 (set-buffer-file-coding-system 'iso-2022-7bit)
336 (let ((standard-output (current-buffer)))
337 (with-temp-buffer
338 (let ((coding-system-for-read 'no-conversion))
339 (insert-file-contents (expand-file-name filename)))
ecd57ad4 340 (set-buffer-multibyte t)
1375754c
KH
341
342 ;; Decode the buffer contents from the encoding specified by a
343 ;; value of the key "ENCODE:".
344 (if (not (search-forward "\nBEGIN" nil t))
345 (error "TIT dictionary doesn't have body part"))
346 (let ((limit (point))
347 coding-system slot)
348 (goto-char (point-min))
349 (if (re-search-forward "^ENCODE:[ \t]*" limit t)
350 (progn
351 (goto-char (match-end 0))
352 (setq tit-encode (tit-read-key-value)))
353 (setq tit-encode tit-default-encode))
354 (setq slot (assoc tit-encode tit-encode-list))
355 (if (not slot)
356 (error "Invalid ENCODE: value in TIT dictionary"))
357 (setq coding-system (nth 1 slot))
86e4f7c0 358 (message "Decoding with coding system %s..." coding-system)
1375754c
KH
359 (goto-char (point-min))
360 (decode-coding-region (point-min) (point-max) coding-system))
361
362 ;; Set point the starting position of the body part.
363 (goto-char (point-min))
364 (if (not (search-forward "\nBEGIN" nil t))
365 (error "TIT dictionary can't be decoded correctly"))
366
367 ;; Process the header part.
368 (forward-line 1)
369 (narrow-to-region (point-min) (point))
370 (tit-process-header filename)
371 (widen)
372
373 ;; Process the body part. For speed, we turn off multibyte facility.
374 (with-current-buffer standard-output
375 (set-buffer-multibyte nil))
376 (set-buffer-multibyte nil)
377 (tit-process-body)))))
4ed46869
KH
378
379;;;###autoload
44cbfae9 380(defun batch-titdic-convert (&optional force)
4ed46869
KH
381 "Run `titdic-convert' on the files remaining on the command line.
382Use this from the command line, with `-batch';
383it won't work in an interactive Emacs.
384For example, invoke \"emacs -batch -f batch-titdic-convert XXX.tit\" to
385 generate Quail package file \"xxx.el\" from TIT dictionary file \"XXX.tit\".
386To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
387 (defvar command-line-args-left) ; Avoid compiler warning.
388 (if (not noninteractive)
389 (error "`batch-titdic-convert' should be used only with -batch"))
390 (if (string= (car command-line-args-left) "-h")
391 (progn
392 (message "To convert XXX.tit and YYY.tit into xxx.el and yyy.el:")
393 (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert XXX.tit YYY.tit")
394 (message "To convert XXX.tit into DIR/xxx.el:")
395 (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert -dir DIR XXX.tit"))
396 (let (targetdir filename files file)
397 (if (string= (car command-line-args-left) "-dir")
398 (progn
399 (setq command-line-args-left (cdr command-line-args-left))
400 (setq targetdir (car command-line-args-left))
401 (setq command-line-args-left (cdr command-line-args-left))))
402 (while command-line-args-left
403 (setq filename (expand-file-name (car command-line-args-left)))
404 (if (file-directory-p filename)
405 (progn
406 (message "Converting all tit files in the directory %s" filename)
407 (setq files (directory-files filename t "\\.tit$")))
408 (setq files (list filename)))
409 (while files
410 (setq file (expand-file-name (car files)))
44cbfae9
KH
411 (when (or force
412 (file-newer-than-file-p
413 file (tit-make-quail-package-file-name file targetdir)))
414 (message "Converting %s to quail-package..." file)
415 (titdic-convert file targetdir))
4ed46869
KH
416 (setq files (cdr files)))
417 (setq command-line-args-left (cdr command-line-args-left)))
86e4f7c0 418 (message "Byte-compile the created files by:")
4ed46869
KH
419 (message " %% emacs -batch -f batch-byte-compile XXX.el")))
420 (kill-emacs 0))
421
422;;; titdic-cnv.el ends here