Add 2011 to FSF/AIST copyright years.
[bpt/emacs.git] / lisp / international / titdic-cnv.el
CommitLineData
c7211fed 1;;; titdic-cnv.el --- convert cxterm dictionary (TIT format) to Quail package -*- coding:iso-2022-7bit; -*-
4ed46869 2
d4877ac1 3;; Copyright (C) 1997, 1998, 2000, 2001, 2002, 2003, 2004,
5df4f04c 4;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
7976eda0 5;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5df4f04c 6;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
2fd125a3
KH
7;; National Institute of Advanced Industrial Science and Technology (AIST)
8;; Registration Number H14PRO021
8f924df7
KH
9;; Copyright (C) 2003
10;; National Institute of Advanced Industrial Science and Technology (AIST)
11;; Registration Number H13PRO009
4ed46869
KH
12
13;; Keywords: Quail, TIT, cxterm
14
15;; This file is part of GNU Emacs.
16
4936186e 17;; GNU Emacs is free software: you can redistribute it and/or modify
4ed46869 18;; it under the terms of the GNU General Public License as published by
4936186e
GM
19;; the Free Software Foundation, either version 3 of the License, or
20;; (at your option) any later version.
4ed46869
KH
21
22;; GNU Emacs is distributed in the hope that it will be useful,
23;; but WITHOUT ANY WARRANTY; without even the implied warranty of
24;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25;; GNU General Public License for more details.
26
27;; You should have received a copy of the GNU General Public License
4936186e 28;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
4ed46869 29
60370d40 30;;; Commentary:
4ed46869 31
49ed466f 32;; Convert cxterm dictionary (of TIT format) to quail-package.
4ed46869
KH
33;;
34;; Usage (within Emacs):
49ed466f 35;; M-x titdic-convert<CR>CXTERM-DICTIONARY-NAME<CR>
4ed46869 36;; Usage (from shell):
49ed466f 37;; % emacs -batch -l titdic-cnv -f batch-titdic-convert\
4ed46869
KH
38;; [-dir DIR] [DIR | FILE] ...
39;;
40;; When you run titdic-convert within Emacs, you have a chance to
41;; modify arguments of `quail-define-package' before saving the
42;; converted file. For instance, you are likely to modify TITLE,
43;; DOCSTRING, and KEY-BINDINGS.
44
49ed466f 45;; Cxterm dictionary file (*.tit) is a line-oriented text (English,
4ed46869
KH
46;; Chinese, Japanese, and Korean) file. The whole file contains of
47;; two parts, the definition part (`header' here after) followed by
48;; the dictionary part (`body' here after). All lines begin with
49;; leading '#' are ignored.
50;;
51;; Each line in the header part has two fields, KEY and VALUE. These
52;; fields are separated by one or more white characters.
53;;
54;; Each line in the body part has two fields, KEYSEQ and TRANSLATIONS.
55;; These fields are separated by one or more white characters.
56;;
57;; See the manual page of `tit2cit' of cxterm distribution for more
58;; detail.
c063e381 59;;
b138056a 60;; Near the end of this file, we also have a few other tools to convert
c063e381 61;; miscellaneous dictionaries.
4ed46869
KH
62
63;;; Code:
64
65(require 'quail)
66
49ed466f 67;; List of values of key "ENCODE:" and the corresponding Emacs
4ed46869
KH
68;; coding-system and language environment name.
69(defvar tit-encode-list
a7f2c216
KH
70 '(("GB" euc-china "Chinese-GB")
71 ("BIG5" cn-big5 "Chinese-BIG5")
72 ("JIS" euc-japan "Japanese")
49ed466f
KH
73 ("KS" euc-kr "Korean")))
74
4558e816
KH
75;; Alist of input method names and the corresponding title and extra
76;; docstring. For each of input method generated from TIT dictionary,
77;; a docstring is automatically generated from the comments in the
78;; dictionary. The extra docstring in this alist is to add more
79;; information.
80;; The command describe-input-method shows the automatically generated
c7211fed 81;; docstring, then an extra docstring while replacing the form \<VAR>
4558e816
KH
82;; by the value of variable VAR. For instance, the form
83;; \<quail-translation-docstring> is replaced by a description about
84;; how to select a translation from a list of candidates.
85
6b1e079c
KH
86(defvar quail-cxterm-package-ext-info
87 '(("chinese-4corner" "\e$(0(?-F\e(B")
88 ("chinese-array30" "\e$(0#R#O\e(B")
4558e816
KH
89 ("chinese-ccdospy" "\e$AKuF4\e(B"
90 "Pinyin base input method for Chinese charset GB2312 \(`chinese-gb2312').
91
c7211fed 92Pinyin is the standard Roman transliteration method for Chinese.
4558e816
KH
93For the detail of Pinyin system, see the documentation of the input
94method `chinese-py'.
95
96This input method works almost the same way as `chinese-py'. The
97difference is that you type a single key for these Pinyin spelling.
98 Pinyin: zh en eng ang ch an ao ai ong sh ing yu(\e$A(9\e(B)
99 keyseq: a f g h i j k l s u y v
c7211fed 100For example:
4558e816
KH
101 Chinese: \e$A0!\e(B \e$A9{\e(B \e$AVP\e(B \e$AND\e(B \e$A9b\e(B \e$ASq\e(B \e$AH+\e(B
102 Pinyin: a guo zhong wen guang yu quan
103 Keyseq: a1 guo4 as1 wf4 guh1 yu..6 qvj6
104
105\\<quail-translation-docstring>
106
107For double-width GB2312 characters correponding to ASCII, use the
108input method `chinese-qj'.")
109
4558e816 110 ("chinese-ecdict" "\e$(05CKH\e(B"
c0943d3d 111"In this input method, you enter a Chinese (Big5) character or word
4558e816
KH
112by typing the corresponding English word. For example, if you type
113\"computer\", \"\e$(0IZH+\e(B\" is input.
114
115\\<quail-translation-docstring>")
116
117 ("chinese-etzy" "\e$(06/0D\e(B"
118"Zhuyin base input method for Chinese Big5 characters (`chinese-big5-1',
119`chinese-big5-2').
120
121Zhuyin is a kind of phonetic symbol. One to three Zhuyin symbols
122compose one Chinese character.
123
124In this input method, you enter a Chinese character by first typing
125keys corresponding to Zhuyin symbols (see the above table) followed by
126SPC, 1, 2, 3, or 4 specifing a tone (SPC:\e$(0?v(N\e(B, 1:\e$(0M=Vy\e(B, 2:\e$(0Dm(N\e(B, 3: \e$(0&9Vy\e(B,
1274:\e$(0(+Vy\e(B).
128
129\\<quail-translation-docstring>")
6b1e079c
KH
130
131 ("chinese-punct-b5" "\e$(0O:\e(BB"
132 "Input method for Chinese punctuations and symbols of Big5
133\(`chinese-big5-1' and `chinese-big5-2').")
134
135 ("chinese-punct" "\e$A1j\e(BG"
136 "Input method for Chinese punctuations and symbols of GB2312
137\(`chinese-gb2312').")
138
139 ("chinese-py-b5" "\e$(03<\e(BB"
140 "Pinyin base input method for Chinese Big5 characters
141\(`chinese-big5-1', `chinese-big5-2').
142
143This input method works almost the same way as `chinese-py' (which
144see).
145
146This input method supports only Han characters. The more convenient
43b11fee
EZ
147method is `chinese-py-punct-b5', which is the combination of this
148method and `chinese-punct-b5' and which supports both Han characters
149and punctuation/symbols.
6b1e079c 150
43b11fee 151For double-width Big5 characters corresponding to ASCII, use the input
6b1e079c
KH
152method `chinese-qj-b5'.
153
154The input method `chinese-py' and `chinese-tonepy' are also Pinyin
43b11fee 155based, but for the character set GB2312 (`chinese-gb2312').")
6b1e079c 156
4558e816
KH
157 ("chinese-qj-b5" "\e$(0)A\e(BB")
158
159 ("chinese-qj" "\e$AH+\e(BG")
160
6b1e079c 161 ("chinese-sw" "\e$AJWN2\e(B"
4558e816
KH
162"Radical base input method for Chinese charset GB2312 (`chinese-gb2312').
163
d20faceb
EZ
164In this input method, you enter a Chinese character by typing two
165keys. The first key corresponds to the first (\e$AJW\e(B) radical, the second
166key corresponds to the last (\e$AN2\e(B) radical. The correspondence of keys
167and radicals is as below:
4558e816
KH
168
169 first radical:
170 a b c d e f g h i j k l m n o p q r s t u v w x y z
a1506d29 171 \e$APD\e(B \e$AZ"\e(B \e$AJ,\e(B \e$AX<\e(B \e$A;p\e(B \e$A?Z\e(B \e$A^P\e(B \e$Ac_\e(B \e$AZ%\e(B \e$A\3\e(B \e$AXi\e(B \e$AD>\e(B \e$Alj\e(B \e$Ab;\e(B \e$ATB\e(B \e$Afy\e(B \e$AJ/\e(B \e$AMu\e(B \e$A0K\e(B \e$AX/\e(B \e$AHU\e(B \e$AeA\e(B \e$Aak\e(B \e$AVq\e(B \e$AR;\e(B \e$AHK\e(B
4558e816
KH
172 last radical:
173 a b c d e f g h i j k l m n o p q r s t u v w x y z
a1506d29 174 \e$ASV\e(B \e$AI=\e(B \e$AMA\e(B \e$A56\e(B \e$AZb\e(B \e$A?Z\e(B \e$ARB\e(B \e$Aqb\e(B \e$A4s\e(B \e$A6!\e(B \e$A[L\e(B \e$Ala\e(B \e$AJ.\e(B \e$A4u\e(B \e$AXg\e(B \e$ACE\e(B \e$A=q\e(B \e$AX-\e(B \e$AE.\e(B \e$ARR\e(B \e$A`m\e(B \e$AP!\e(B \e$A3'\e(B \e$A3f\e(B \e$A_.\e(B \e$A27\e(B
4558e816 175
43e5a7fe 176\\<quail-translation-docstring>")
4558e816 177
6b1e079c
KH
178 ("chinese-tonepy" "\e$A5wF4\e(B"
179 "Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312').
180
c3ff164a 181Pinyin is the standard roman transliteration method for Chinese.
d20faceb 182For the details of Pinyin system, see the documentation of the input
6b1e079c
KH
183method `chinese-py'.
184
185This input method works almost the same way as `chinese-py'. The
4558e816
KH
186difference is that you must type 1..5 after each Pinyin spelling to
187specify a tone (1:\e$ARuF=\e(B, 2:\e$AQtF=\e(B, 3:\e$AIOIy\e(B, 4\e$AOBIy\e(B, 5:\e$AGaIy\e(B).
188
43e5a7fe 189\\<quail-translation-docstring>
4558e816
KH
190
191For instance, to input \e$ADc\e(B, you type \"n i 3 3\", the first \"n i\" is
192a Pinyin, the next \"3\" specifies tone, and the last \"3\" selects
193the third character from the candidate list.
6b1e079c
KH
194
195For double-width GB2312 characters correponding to ASCII, use the
196input method `chinese-qj'.")
197
4558e816
KH
198 ("chinese-zozy" "\e$(0I\0D\e(B"
199"Zhuyin base input method for Chinese Big5 characters (`chinese-big5-1',
200`chinese-big5-2').
201
d20faceb 202Zhuyin is a kind of a phonetic symbol. One to three Zhuyin symbols
4558e816
KH
203compose a Chinese character.
204
205In this input method, you enter a Chinese character by first typing
206keys corresponding to Zhuyin symbols (see the above table) followed by
207SPC, 6, 3, 4, or 7 specifing a tone (SPC:\e$(0?v(N\e(B, 6:\e$(0Dm(N\e(B, 3:\e$(0&9Vy\e(B, 4:\e$(0(+Vy\e(B,
2087:\e$(0M=Vy\e(B).
209
43e5a7fe 210\\<quail-translation-docstring>")))
4ed46869
KH
211
212;; Return a value of the key in the current line.
213(defsubst tit-read-key-value ()
6b61353c 214 (if (looking-at "[^ \t\r\n]+")
4ed46869
KH
215 (car (read-from-string (concat "\"" (match-string 0) "\"")))))
216
217;; Return an appropriate quail-package filename from FILENAME (TIT
49ed466f
KH
218;; dictionary filename). For instance, ".../ZOZY.tit" -> "ZOZY.el".
219(defun tit-make-quail-package-file-name (filename &optional dirname)
4ed46869 220 (expand-file-name
49ed466f 221 (concat (file-name-nondirectory (substring filename 0 -4)) ".el")
4ed46869
KH
222 dirname))
223
1375754c 224;; This value is nil if we are processing phrase dictionary.
8c1ccc6c 225(defvar tit-dictionary t)
4ed46869
KH
226(defvar tit-encode nil)
227(defvar tit-default-encode "GB")
228
229;; Generate elements of KEY-BINDINGS arg for `quail-define-package' so
230;; that each characters in KEYS invokes FUNCTION-SYMBOL.
231(defun tit-generate-key-bindings (keys function-symbol)
232 (let ((len (length keys))
233 (i 0)
1375754c 234 (first t)
4ed46869
KH
235 key)
236 (while (< i len)
1375754c 237 (or first (princ "\n "))
4ed46869 238 (setq key (aref keys i))
1375754c
KH
239 (if (if (< key ?\ )
240 (eq (lookup-key quail-translation-keymap
241 (char-to-string key))
4ed46869 242 'quail-execute-non-quail-command)
1375754c
KH
243 (<= key 127))
244 (progn
245 (princ (cons (cond ((< key ?\ ) (format "\"\\C-%c\"" (+ key ?@)))
246 ((< key 127) (format "\"%c\"" key))
247 (t "\"\\C-?\""))
248 function-symbol))
249 (setq first nil)))
4ed46869
KH
250 (setq i (1+ i)))))
251
252;; Analyze header part of TIT dictionary and generate an appropriate
253;; `quail-define-package' function call.
254(defun tit-process-header (filename)
255 (message "Processing header part...")
256 (goto-char (point-min))
257
1375754c
KH
258 ;; At first, generate header part of the Quail package while
259 ;; collecting information from the original header.
260 (let ((package (concat
261 "chinese-"
262 (substring (downcase (file-name-nondirectory filename))
263 0 -4)))
264 ;; TIT keywords and the corresponding default values.
4ed46869
KH
265 (tit-multichoice t)
266 (tit-prompt "")
267 (tit-comments nil)
268 (tit-backspace "\010\177")
269 (tit-deleteall "\015\025")
270 (tit-moveright ".>")
271 (tit-moveleft ",<")
272 (tit-keyprompt nil))
1375754c
KH
273
274 (princ ";; Quail package `")
98223b73 275 (princ package)
bb5b9e9d 276 (princ (format "' -*- coding:%s; " coding-system-for-write))
7d15a839 277 (princ "byte-compile-disable-print-circle:t; -*-\n")
c7211fed 278 (princ ";; Generated by the command `titdic-convert'\n;;\tDate: ")
1375754c
KH
279 (princ (current-time-string))
280 (princ "\n;;\tOriginal TIT dictionary file: ")
281 (princ (file-name-nondirectory filename))
282 (princ "\n\n;;; Comment:\n\n")
86e4f7c0 283 (princ ";; Byte-compile this file again after any modification.\n\n")
1375754c
KH
284 (princ ";;; Start of the header of original TIT dictionary.\n\n")
285
4ed46869 286 (while (not (eobp))
1375754c
KH
287 (let ((ch (following-char))
288 (pos (point)))
4ed46869
KH
289 (cond ((= ch ?C) ; COMMENT
290 (cond ((looking-at "COMMENT")
291 (let ((pos (match-end 0)))
292 (end-of-line)
8b735b2b
KH
293 (setq tit-comments
294 (cons (buffer-substring-no-properties pos (point))
295 tit-comments))))))
4ed46869
KH
296 ((= ch ?M) ; MULTICHOICE, MOVERIGHT, MOVELEFT
297 (cond ((looking-at "MULTICHOICE:[ \t]*")
298 (goto-char (match-end 0))
299 (setq tit-multichoice (looking-at "YES")))
300 ((looking-at "MOVERIGHT:[ \t]*")
301 (goto-char (match-end 0))
302 (setq tit-moveright (tit-read-key-value)))
303 ((looking-at "MOVELEFT:[ \t]*")
304 (goto-char (match-end 0))
305 (setq tit-moveleft (tit-read-key-value)))))
306 ((= ch ?P) ; PROMPT
307 (cond ((looking-at "PROMPT:[ \t]*")
308 (goto-char (match-end 0))
6b1e079c
KH
309 (setq tit-prompt (tit-read-key-value))
310 ;; Some TIT dictionaies that are encoded by
311 ;; euc-china contains invalid character at the tail.
312 (let* ((last (aref tit-prompt (1- (length tit-prompt))))
313 (split (split-char last)))
314 (if (or (eq (nth 1 split) 32)
315 (eq (nth 2 split) 32))
316 (setq tit-prompt (substring tit-prompt 0 -1)))))))
4ed46869
KH
317 ((= ch ?B) ; BACKSPACE, BEGINDICTIONARY,
318 ; BEGINPHRASE
319 (cond ((looking-at "BACKSPACE:[ \t]*")
320 (goto-char (match-end 0))
321 (setq tit-backspace (tit-read-key-value)))
322 ((looking-at "BEGINDICTIONARY")
1375754c 323 (setq tit-dictionary t))
4ed46869 324 ((looking-at "BEGINPHRASE")
1375754c 325 (setq tit-dictionary nil))))
4ed46869
KH
326 ((= ch ?K) ; KEYPROMPT
327 (cond ((looking-at "KEYPROMPT(\\(.*\\)):[ \t]*")
328 (let ((key-char (match-string 1)))
329 (goto-char (match-end 0))
1fa1e1f5
RS
330 (if (string-match "\\\\[0-9]+" key-char)
331 (setq key-char
332 (car (read-from-string (format "\"%s\""
333 key-char)))))
4ed46869
KH
334 (setq tit-keyprompt
335 (cons (cons key-char (tit-read-key-value))
1375754c
KH
336 tit-keyprompt)))))))
337 (end-of-line)
338 (princ ";; ")
8b735b2b 339 (princ (buffer-substring-no-properties pos (point)))
1375754c
KH
340 (princ "\n")
341 (forward-line 1)))
a1506d29 342
1375754c
KH
343 (princ "\n;;; End of the header of original TIT dictionary.\n\n")
344 (princ ";;; Code:\n\n(require 'quail)\n\n")
345
346 (princ "(quail-define-package ")
347 ;; Args NAME, LANGUAGE, TITLE
6b1e079c 348 (let ((title (nth 1 (assoc package quail-cxterm-package-ext-info))))
1375754c
KH
349 (princ "\"")
350 (princ package)
351 (princ "\" \"")
352 (princ (nth 2 (assoc tit-encode tit-encode-list)))
353 (princ "\" \"")
354 (princ (or title
355 (if (string-match "[:\e$A!K\e$(0!(!J\e(B]+\\([^:\e$A!K\e$(0!(!K\e(B]+\\)" tit-prompt)
356 (substring tit-prompt (match-beginning 1) (match-end 1))
357 tit-prompt)))
358 (princ "\"\n"))
4ed46869
KH
359
360 ;; Arg GUIDANCE
361 (if tit-keyprompt
362 (progn
1375754c 363 (princ " '(")
4ed46869 364 (while tit-keyprompt
1375754c
KH
365 (princ " ")
366 (princ (format "(%d . \"%s\")\n"
367 (string-to-char (car (car tit-keyprompt)))
368 (cdr (car tit-keyprompt))))
4ed46869 369 (setq tit-keyprompt (cdr tit-keyprompt)))
1375754c
KH
370 (princ ")"))
371 (princ " t\n"))
4ed46869
KH
372
373 ;; Arg DOCSTRING
6b1e079c
KH
374 (let ((doc (concat tit-prompt "\n"))
375 (comments (if tit-comments
376 (mapconcat 'identity (nreverse tit-comments) "\n")))
377 (doc-ext (nth 2 (assoc package quail-cxterm-package-ext-info))))
378 (if comments
379 (setq doc (concat doc "\n" comments "\n")))
380 (if doc-ext
381 (setq doc (concat doc "\n" doc-ext "\n")))
382 (prin1 doc)
383 (terpri))
4ed46869
KH
384
385 ;; Arg KEY-BINDINGS
1375754c 386 (princ " '(")
4ed46869 387 (tit-generate-key-bindings tit-backspace 'quail-delete-last-char)
1375754c 388 (princ "\n ")
4ed46869 389 (tit-generate-key-bindings tit-deleteall 'quail-abort-translation)
1375754c 390 (princ "\n ")
4ed46869 391 (tit-generate-key-bindings tit-moveright 'quail-next-translation)
1375754c 392 (princ "\n ")
4ed46869 393 (tit-generate-key-bindings tit-moveleft 'quail-prev-translation)
1375754c 394 (princ ")\n")
4ed46869
KH
395
396 ;; Args FORGET-TRANSLATION, DETERMINISTIC, KBD-TRANSLATE, SHOW-LAYOUT.
397 ;; The remaining args are all nil.
1375754c
KH
398 (princ " nil")
399 (princ (if tit-multichoice " nil" " t"))
400 (princ (if tit-keyprompt " t t)\n\n" " nil nil)\n\n"))))
401
402(defsubst tit-flush-translations (key translations)
403 (if (string-match "\\\\[0-9][0-9][0-9]" key)
404 (let ((newkey (concat (substring key 0 (match-beginning 0))
405 (car (read-from-string
406 (concat "\"" (match-string 0 key) "\"")))))
407 (idx (match-end 0)))
408 (while (string-match "\\\\[0-9][0-9][0-9]" key idx)
409 (setq newkey (concat
410 newkey
411 (substring key idx (match-beginning 0))
412 (car (read-from-string
413 (concat "\"" (match-string 0 key) "\"")))))
414 (setq idx (match-end 0)))
415 (setq key (concat newkey (substring key idx)))))
416 (prin1 (list key (if tit-dictionary translations
417 (vconcat (nreverse translations)))))
418 (princ "\n"))
4ed46869
KH
419
420;; Convert body part of TIT dictionary into `quail-define-rules'
421;; function call.
422(defun tit-process-body ()
423 (message "Formatting translation rules...")
1375754c
KH
424 (let* ((template (list nil nil))
425 (second (cdr template))
426 (prev-key "")
427 ch key translations pos)
428 (princ "(quail-define-rules\n")
4ed46869 429 (while (null (eobp))
1375754c
KH
430 (setq ch (following-char))
431 (if (or (= ch ?#) (= ch ?\n))
432 (forward-line 1)
4ed46869 433 (setq pos (point))
1375754c 434 (skip-chars-forward "^ \t\n")
8b735b2b 435 (setq key (buffer-substring-no-properties pos (point)))
4ed46869 436 (skip-chars-forward " \t")
1375754c
KH
437 (setq ch (following-char))
438 (if (or (= ch ?#) (= ch ?\n))
08a1bf22 439 ;; This entry contains no translations. Let's ignore it.
1375754c
KH
440 (forward-line 1)
441 (or (string= key prev-key)
08a1bf22 442 (progn
1375754c
KH
443 (if translations
444 (tit-flush-translations prev-key translations))
445 (setq translations nil
446 prev-key key)))
447 (if tit-dictionary
448 (progn
449 (setq pos (point))
450 (skip-chars-forward "^ \t#\n")
451 (setq translations
452 (if translations
453 (concat translations
8b735b2b
KH
454 (buffer-substring-no-properties pos (point)))
455 (buffer-substring-no-properties pos (point)))))
1375754c
KH
456 (while (not (eolp))
457 (setq pos (point))
458 (skip-chars-forward "^ \t\n")
8b735b2b
KH
459 (setq translations (cons (buffer-substring-no-properties
460 pos (point))
1375754c
KH
461 translations))
462 (skip-chars-forward " \t")
463 (setq ch (following-char))
464 (if (= ch ?#) (end-of-line))))
08a1bf22 465 (forward-line 1))))
1375754c
KH
466
467 (if translations
468 (tit-flush-translations prev-key translations))
469 (princ ")\n")))
4ed46869
KH
470
471;;;###autoload
472(defun titdic-convert (filename &optional dirname)
473 "Convert a TIT dictionary of FILENAME into a Quail package.
474Optional argument DIRNAME if specified is the directory name under which
475the generated Quail package is saved."
476 (interactive "FTIT dictionary file: ")
98223b73 477 (let ((coding-system-for-write nil))
c7211fed 478 (with-temp-file (tit-make-quail-package-file-name filename dirname)
c7211fed
DL
479 (let ((standard-output (current-buffer)))
480 (with-temp-buffer
481 (set-buffer-multibyte nil)
f9362982
KH
482 ;; Here we must use `raw-text' instead of `no-conversion' to
483 ;; enable auto-decoding of eol format (CRLF->LF).
484 (let ((coding-system-for-read 'raw-text))
c7211fed 485 (insert-file-contents (expand-file-name filename)))
a1506d29 486
c7211fed
DL
487 ;; Decode the buffer contents from the encoding specified by a
488 ;; value of the key "ENCODE:".
489 (if (not (search-forward "\nBEGIN" nil t))
490 (error "TIT dictionary doesn't have body part"))
491 (let ((limit (point))
492 coding-system slot)
493 (goto-char (point-min))
494 (if (re-search-forward "^ENCODE:[ \t]*" limit t)
495 (progn
496 (goto-char (match-end 0))
497 (setq tit-encode (tit-read-key-value)))
498 (setq tit-encode tit-default-encode))
499 (setq slot (assoc tit-encode tit-encode-list))
500 (if (not slot)
501 (error "Invalid ENCODE: value in TIT dictionary"))
502 (setq coding-system (nth 1 slot))
503 (message "Decoding with coding system %s..." coding-system)
504 (goto-char (point-min))
98223b73 505 (decode-coding-region (point-min) (point-max) coding-system)
f9362982
KH
506 ;; Explicitly set eol format to `unix'.
507 (setq coding-system-for-write
508 (coding-system-change-eol-conversion coding-system 'unix))
98223b73 509 (remove-text-properties (point-min) (point-max) '(charset nil)))
c7211fed 510
66b820ba 511 (set-buffer-multibyte t)
c7211fed 512 ;; Set point the starting position of the body part.
1375754c 513 (goto-char (point-min))
c7211fed
DL
514 (if (not (search-forward "\nBEGIN" nil t))
515 (error "TIT dictionary can't be decoded correctly"))
516
2db18278 517 ;; Process the header part.
c7211fed
DL
518 (forward-line 1)
519 (narrow-to-region (point-min) (point))
520 (tit-process-header filename)
521 (widen)
522
8f924df7 523 ;; Process the body part
c7211fed 524 (tit-process-body))))))
4ed46869
KH
525
526;;;###autoload
44cbfae9 527(defun batch-titdic-convert (&optional force)
4ed46869
KH
528 "Run `titdic-convert' on the files remaining on the command line.
529Use this from the command line, with `-batch';
530it won't work in an interactive Emacs.
531For example, invoke \"emacs -batch -f batch-titdic-convert XXX.tit\" to
532 generate Quail package file \"xxx.el\" from TIT dictionary file \"XXX.tit\".
533To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
534 (defvar command-line-args-left) ; Avoid compiler warning.
535 (if (not noninteractive)
536 (error "`batch-titdic-convert' should be used only with -batch"))
537 (if (string= (car command-line-args-left) "-h")
538 (progn
539 (message "To convert XXX.tit and YYY.tit into xxx.el and yyy.el:")
540 (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert XXX.tit YYY.tit")
541 (message "To convert XXX.tit into DIR/xxx.el:")
542 (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert -dir DIR XXX.tit"))
543 (let (targetdir filename files file)
544 (if (string= (car command-line-args-left) "-dir")
545 (progn
546 (setq command-line-args-left (cdr command-line-args-left))
547 (setq targetdir (car command-line-args-left))
548 (setq command-line-args-left (cdr command-line-args-left))))
549 (while command-line-args-left
550 (setq filename (expand-file-name (car command-line-args-left)))
551 (if (file-directory-p filename)
552 (progn
553 (message "Converting all tit files in the directory %s" filename)
554 (setq files (directory-files filename t "\\.tit$")))
555 (setq files (list filename)))
556 (while files
557 (setq file (expand-file-name (car files)))
44cbfae9
KH
558 (when (or force
559 (file-newer-than-file-p
560 file (tit-make-quail-package-file-name file targetdir)))
561 (message "Converting %s to quail-package..." file)
562 (titdic-convert file targetdir))
4ed46869
KH
563 (setq files (cdr files)))
564 (setq command-line-args-left (cdr command-line-args-left)))
86e4f7c0 565 (message "Byte-compile the created files by:")
4ed46869
KH
566 (message " %% emacs -batch -f batch-byte-compile XXX.el")))
567 (kill-emacs 0))
568
c063e381
KH
569\f
570;;; Converter of miscellaneous dictionaries other than TIT format.
571
572;; Alist of input method names and the corresponding information.
573;; Each element has this form:
574;; (INPUT-METHOD-NAME ;; Name of the input method.
a1506d29 575;; INPUT-METHOD-TITLE ;; Title string of the input method
c063e381
KH
576;; DICFILE ;; Name of the source dictionary file.
577;; CODING ;; Coding system of the dictionary file.
578;; QUAILFILE ;; Name of the Quail package file.
579;; CONVERTER ;; Function to generate the Quail package.
580;; COPYRIGHT-NOTICE ;; Copyright notice of the source dictionary.
581;; )
582
583(defvar quail-misc-package-ext-info
584 '(("chinese-b5-tsangchi" "\e$(06A\e(BB"
a1506d29 585 "cangjie-table.b5" big5 "tsang-b5.el"
c063e381
KH
586 tsang-b5-converter
587 "\
588;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
589;; #
590;; # Permission to copy and distribute both modified and
591;; # unmodified versions is granted without royalty provided
592;; # this notice is preserved.")
593
594 ("chinese-b5-quick" "\e$(0X|\e(BB"
a1506d29 595 "cangjie-table.b5" big5 "quick-b5.el"
c063e381
KH
596 quick-b5-converter
597 "\
598;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
599;; #
600;; # Permission to copy and distribute both modified and
601;; # unmodified versions is granted without royalty provided
602;; # this notice is preserved.")
603
604 ("chinese-cns-tsangchi" "\e$(GT?\e(BC"
605 "cangjie-table.cns" iso-2022-cn-ext "tsang-cns.el"
606 tsang-cns-converter
607 "\
608;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
609;; #
610;; # Permission to copy and distribute both modified and
611;; # unmodified versions is granted without royalty provided
612;; # this notice is preserved.")
613
614 ("chinese-cns-quick" "\e$(Gv|\e(BC"
615 "cangjie-table.cns" iso-2022-cn-ext "quick-cns.el"
616 quick-cns-converter
617 "\
618;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
619;; #
620;; # Permission to copy and distribute both modified and
621;; # unmodified versions is granted without royalty provided
622;; # this notice is preserved.")
623
624 ("chinese-py" "\e$AF4\e(BG"
625 "pinyin.map" cn-gb-2312 "PY.el"
626 py-converter
627 "\
628;; \"pinyin.map\" is included in a free package called CCE. It is
629;; available at:
630;; http://ftp.debian.org/debian/dists/potato/main
631;; /source/utils/cce_0.36.orig.tar.gz
632;; This package contains the following copyright notice.
633;;
634;;
635;; Copyright (C) 1999, Rui He, herui@cs.duke.edu
a1506d29
JB
636;;
637;;
c063e381 638;; CCE(Console Chinese Environment) 0.32
a1506d29
JB
639;;
640;; CCE is free software; you can redistribute it and/or modify it under the
641;; terms of the GNU General Public License as published by the Free Software
642;; Foundation; either version 1, or (at your option) any later version.
643;;
644;; CCE is distributed in the hope that it will be useful, but WITHOUT ANY
645;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
646;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
647;; details.
648;;
c063e381 649;; You should have received a copy of the GNU General Public License along with
a1506d29 650;; CCE; see the file COPYING. If not, write to the Free Software Foundation,
3ef97fb6 651;; 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.")
c063e381
KH
652
653 ("chinese-ziranma" "\e$AWTH;\e(B"
654 "ziranma.cin" cn-gb-2312 "ZIRANMA.el"
655 ziranma-converter
656 "\
657;; \"ziranma.cin\" is included in a free package called CCE. It is
658;; available at:
659;; http://ftp.debian.org/debian/dists/potato/main
660;; /source/utils/cce_0.36.orig.tar.gz
661;; This package contains the following copyright notice.
662;;
663;;
664;; Copyright (C) 1999, Rui He, herui@cs.duke.edu
a1506d29
JB
665;;
666;;
c063e381 667;; CCE(Console Chinese Environment) 0.32
a1506d29
JB
668;;
669;; CCE is free software; you can redistribute it and/or modify it under the
670;; terms of the GNU General Public License as published by the Free Software
671;; Foundation; either version 1, or (at your option) any later version.
672;;
673;; CCE is distributed in the hope that it will be useful, but WITHOUT ANY
674;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
675;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
676;; details.
677;;
c063e381 678;; You should have received a copy of the GNU General Public License along with
a1506d29 679;; CCE; see the file COPYING. If not, write to the Free Software Foundation,
3ef97fb6 680;; 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.")
a4c4011b 681
a4c4011b
AC
682 ("chinese-ctlau" "\e$AAuTA\e(B"
683 "CTLau.html" cn-gb-2312 "CTLau.el"
684 ctlau-gb-converter
685 "\
686;; \"CTLau.html\" is available at:
687;;
688;; http://umunhum.stanford.edu/~lee/chicomp/CTLau.html
689;;
690;; It contains the following copyright notice:
691;;
692;; # Copyright (C) 1988-2001 Fung Fung Lee (lee@umunhum.stanford.edu)
a1506d29 693;; #
a4c4011b
AC
694;; # This program is free software; you can redistribute it and/or
695;; # modify it under the terms of the GNU General Public License
696;; # as published by the Free Software Foundation; either version 2
697;; # of the License, or any later version.
a1506d29 698;; #
a4c4011b
AC
699;; # This program is distributed in the hope that it will be useful,
700;; # but WITHOUT ANY WARRANTY; without even the implied warranty of
701;; # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
702;; # GNU General Public License for more details.
a1506d29 703;; #
a4c4011b
AC
704;; # You should have received a copy of the GNU General Public License
705;; # along with this program; if not, write to the Free Software Foundation,
086add15 706;; # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.")
a4c4011b 707
e089be2d 708 ("chinese-ctlaub" "\e$(0N,Gn\e(B"
2bef0948 709 "CTLau-b5.html" big5 "CTLau-b5.el"
a4c4011b
AC
710 ctlau-b5-converter
711 "\
712;; \"CTLau-b5.html\" is available at:
713;;
714;; http://umunhum.stanford.edu/~lee/chicomp/CTLau-b5.html
715;;
716;; It contains the following copyright notice:
717;;
718;; # Copyright (C) 1988-2001 Fung Fung Lee (lee@umunhum.stanford.edu)
a1506d29 719;; #
a4c4011b
AC
720;; # This program is free software; you can redistribute it and/or
721;; # modify it under the terms of the GNU General Public License
722;; # as published by the Free Software Foundation; either version 2
723;; # of the License, or any later version.
a1506d29 724;; #
a4c4011b
AC
725;; # This program is distributed in the hope that it will be useful,
726;; # but WITHOUT ANY WARRANTY; without even the implied warranty of
727;; # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
728;; # GNU General Public License for more details.
a1506d29 729;; #
a4c4011b
AC
730;; # You should have received a copy of the GNU General Public License
731;; # along with this program; if not, write to the Free Software Foundation,
086add15 732;; # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.")
c063e381
KH
733 ))
734
735;; Generate a code of a Quail package in the current buffer from Tsang
736;; dictionary in the buffer DICBUF. The input method name of the
737;; Quail package is NAME, and the title string is TITLE.
738
739;; TSANG-P is non-nil, genereate \e$(06AQo\e(B input method. Otherwise
740;; generate \e$(0X|/y\e(B (simple version of \e$(06AQo\e(B). If BIG5-P is non-nil, the
741;; input method is for inputting Big5 characters. Otherwise the input
742;; method is for inputting CNS characters.
743
744(defun tsang-quick-converter (dicbuf name title tsang-p big5-p)
745 (let ((fulltitle (if tsang-p (if big5-p "\e$(06AQo\e(B" "\e$(GT?on\e(B")
746 (if big5-p "\e$(0X|/y\e(B" "\e$(Gv|Mx\e(B")))
747 dic)
748 (goto-char (point-max))
749 (if big5-p
750 (insert (format "\"\e$(0&d'GTT&,!J\e(B%s\e$(0!K\e(BBIG5
751
752 \e$(0KHM$\e(B%s\e$(0TT&,WoOu\e(B
753
754 [Q \e$(0'D\e(B] [W \e$(0(q\e(B] [E \e$(0'V\e(B] [R \e$(0&H\e(B] [T \e$(0'>\e(B] [Y \e$(0&4\e(B] [U \e$(0&U\e(B] [I \e$(0'B\e(B] [O \e$(0&*\e(B] [P \e$(0'A\e(B]
755
756 [A \e$(0'K\e(B] [S \e$(0&T\e(B] [D \e$(0'N\e(B] [F \e$(0'W\e(B] [G \e$(0&I\e(B] [H \e$(0*M\e(B] [J \e$(0&3\e(B] [L \e$(0&d\e(B]
a1506d29 757
c063e381
KH
758 [Z ] [X \e$(0[E\e(B] [C \e$(01[\e(B] [V \e$(0&M\e(B] [B \e$(0'M\e(B] [N \e$(0&_\e(B] [M \e$(0&"\e(B]
759
760\\\\<quail-translation-docstring>\"\n"
761 fulltitle fulltitle))
762 (insert (format "\"\e$(GDcEFrSD+!J\e(B%s\e$(G!K\e(BCNS
763
764 \e$(GiGk#\e(B%s\e$(GrSD+uomu\e(B
765
766 [Q \e$(GEC\e(B] [W \e$(GFp\e(B] [E \e$(GEU\e(B] [R \e$(GDG\e(B] [T \e$(GE=\e(B] [Y \e$(GD3\e(B] [U \e$(GDT\e(B] [I \e$(GEA\e(B] [O \e$(GD)\e(B] [P \e$(GE@\e(B]
767
768 [A \e$(GEJ\e(B] [S \e$(GDS\e(B] [D \e$(GEM\e(B] [F \e$(GEV\e(B] [G \e$(GDH\e(B] [H \e$(GHL\e(B] [J \e$(GD2\e(B] [L \e$(GDc\e(B]
a1506d29
JB
769
770 [Z ] [X \e$(GyE\e(B] [C \e$(GOZ\e(B] [V \e$(GDL\e(B] [B \e$(GEL\e(B] [N \e$(GD^\e(B] [M \e$(GD!\e(B]
c063e381
KH
771
772\\\\<quail-translation-docstring>\"\n"
773 fulltitle fulltitle)))
774 (insert " '((\".\" . quail-next-translation-block)
775 (\",\" . quail-prev-translation-block))
776 nil nil)\n\n")
777 (insert "(quail-define-rules\n")
9a529312 778 (with-current-buffer dicbuf
6b61353c
KH
779 ;; Handle double CR line ends, which result when checking out of
780 ;; CVS on MS-Windows.
781 (goto-char (point-min))
c063e381
KH
782 (search-forward "A440")
783 (beginning-of-line)
784 (let ((table (make-hash-table :test 'equal))
785 val)
786 (while (not (eobp))
787 (forward-char 5)
788 (let ((trans (char-to-string (following-char)))
789 key slot)
8b735b2b 790 (re-search-forward "\\([A-Z]+\\)\r*$" nil t)
c063e381
KH
791 (setq key (downcase
792 (if (or tsang-p
8b735b2b
KH
793 (<= (- (match-end 1) (match-beginning 1)) 1))
794 (match-string 1)
795 (string (char-after (match-beginning 1))
796 (char-after (1- (match-end 1)))))))
c063e381
KH
797 (setq val (gethash key table))
798 (if val (setq trans (concat val trans)))
799 (puthash key trans table)
800 (forward-line 1)))
801 (maphash #'(lambda (key val) (setq dic (cons (cons key val) dic)))
802 table)))
803 (setq dic (sort dic (function (lambda (x y) (string< (car x ) (car y))))))
804 (dolist (elt dic)
805 (insert (format "(%S\t%S)\n" (car elt) (cdr elt))))
806 (let ((punctuations '((";" "\e$(0!'!2!"!#!.!/\e(B" "\e$(G!'!2!"!#!.!/\e(B")
807 (":" "\e$(0!(!+!3!%!$!&!0!1\e(B" "\e$(G!(!+!3!%!$!&!0!1\e(B")
808 ("'" "\e$(0!e!d\e(B" "\e$(G!e!d\e(B")
809 ("\"" "\e$(0!g!f!h!i!q\e(B" "\e$(G!g!f!h!i!q\e(B")
810 ("\\" "\e$(0"`"b#M\e(B" "\e$(G"`"b#M\e(B")
811 ("|" "\e$(0!6!8!:"^\e(B" "\e$(G!6!8!:"^\e(B")
812 ("/" "\e$(0"_"a#L\e(B" "\e$(G"_"a#L\e(B")
813 ("?" "\e$(0!)!4\e(B" "\e$(G!)!4\e(B")
814 ("<" "\e$(0!R"6"A!T"H\e(B" "\e$(G!R"6"A!T"H\e(B")
815 (">" "\e$(0!S"7"B!U\e(B" "\e$(G!S"7"B!U\e(B")
816 ("[" "\e$(0!F!J!b!H!L!V!Z!X!\\e(B" "\e$(G!F!J!b!H!L!V!Z!X!\\e(B")
817 ("]" "\e$(0!G!K!c!I!M!W![!Y!]\e(B" "\e$(G!G!K!c!I!M!W![!Y!]\e(B")
818 ("{" "\e$(0!B!`!D\e(B " "\e$(G!B!`!D\e(B ")
819 ("}" "\e$(0!C!a!E\e(B" "\e$(G!C!a!E\e(B")
820 ("`" "\e$(0!j!k\e(B" "\e$(G!j!k\e(B")
821 ("~" "\e$(0"D"+",!<!=\e(B" "\e$(G"D"+",!<!=\e(B")
822 ("!" "\e$(0!*!5\e(B" "\e$(G!*!5\e(B")
823 ("@" "\e$(0"i"n\e(B" "\e$(G"i"n\e(B")
824 ("#" "\e$(0!l"-\e(B" "\e$(G!l"-\e(B")
825 ("$" "\e$(0"c"l\e(B" "\e$(G"c"l\e(B")
826 ("%" "\e$(0"h"m\e(B" "\e$(G"h"m\e(B")
827 ("&" "\e$(0!m".\e(B" "\e$(G!m".\e(B")
828 ("*" "\e$(0!n"/!o!w!x\e(B" "\e$(G!n"/!o!w!x\e(B")
829 ("(" "\e$(0!>!^!@\e(B" "\e$(G!>!^!@\e(B")
830 (")" "\e$(0!?!_!A\e(B" "\e$(G!?!_!A\e(B")
831 ("-" "\e$(0!7!9"#"$"1"@\e(B" "\e$(G!7!9"#"$"1"@\e(B")
832 ("_" "\e$(0"%"&\e(B" "\e$(G"%"&\e(B")
833 ("=" "\e$(0"8"C\e(B" "\e$(G"8"C\e(B")
834 ("+" "\e$(0"0"?\e(B" "\e$(G"0"?\e(B"))))
835 (dolist (elt punctuations)
836 (insert (format "(%S %S)\n" (concat "z" (car elt))
837 (if big5-p (nth 1 elt) (nth 2 elt))))))
838 (insert ")\n")))
839
840(defun tsang-b5-converter (dicbuf name title)
841 (tsang-quick-converter dicbuf name title t t))
842
843(defun quick-b5-converter (dicbuf name title)
844 (tsang-quick-converter dicbuf name title nil t))
845
846(defun tsang-cns-converter (dicbuf name title)
847 (tsang-quick-converter dicbuf name title t nil))
848
849(defun quick-cns-converter (dicbuf name title)
850 (tsang-quick-converter dicbuf name title nil nil))
851
852;; Generate a code of a Quail package in the current buffer from
853;; Pinyin dictionary in the buffer DICBUF. The input method name of
854;; the Quail package is NAME, and the title string is TITLE.
855
856(defun py-converter (dicbuf name title)
857 (goto-char (point-max))
858 (insert (format "%S\n" "\e$A::WVJdHk!KF4Rt!K\e(B
859
860 \e$AF4Rt7=08\e(B
861
862 \e$AP!P4S"NDWVD84z1m!8F4Rt!97{:E#,\e(B \"u(yu) \e$ATrSC\e(B u: \e$A1mJ>!C\e(B
863
864Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312').
865
c3ff164a 866Pinyin is the standard roman transliteration method for Chinese.
c063e381
KH
867Pinyin uses a sequence of Latin alphabetic characters for each Chinese
868character. The sequence is made by the combination of the initials
869\(the beginning sounds) and finals (the ending sounds).
870
871 initials: b p m f d t n l z c s zh ch sh r j q x g k h
872 finals: a o e i er ai ei oa ou an en ang eng ong i ia iao ie iu ian in
873 iang ing iong u ua uo uai ui uan un uan ueng yu yue yuan yun
874
875 (Note: In the correct Pinyin writing, the sequence \"yu\" in the last
876 four finals should be written by the character u-umlaut `\e$A(9\e(B'.)
877
878With this input method, you enter a Chinese character by first
879entering its pinyin spelling.
880
881\\<quail-translation-docstring>
882
883For instance, to input \e$ADc\e(B, you type \"n i C-n 3\". The first \"n i\"
884is a Pinyin, \"C-n\" selects the next group of candidates (each group
885contains at most 10 characters), \"3\" select the third character in
886that group.
887
888This input method supports only Han characters. The related input
889method `chinese-py-punct' is the combination of this method and
890`chinese-punct'; it supports both Han characters and punctuation
891characters.
892
893For double-width GB2312 characters corresponding to ASCII, use the
894input method `chinese-qj'.
895
896The correct Pinyin system specifies tones by diacritical marks, but
897this input method doesn't use them, which results in easy (you don't
898have to know the exact tones), but verbose (many characters are assigned
899to the same key sequence) input. You may also want to try the input
900method `chinese-tonepy' with which you must specify tones by digits
901\(1..5)."))
902 (insert " '((\"\C-?\" . quail-delete-last-char)
903 (\".\" . quail-next-translation)
904 (\">\" . quail-next-translation)
905 (\",\" . quail-prev-translation)
906 (\"<\" . quail-prev-translation))
907 nil nil nil nil)\n\n")
908 (insert "(quail-define-rules\n")
909 (let ((pos (point)))
8b735b2b 910 (insert-buffer-substring-no-properties dicbuf)
c063e381 911 (goto-char pos)
8c8d1081 912 (re-search-forward "^[a-z]")
7d15a839
KH
913 (beginning-of-line)
914 (delete-region pos (point))
c063e381
KH
915 (while (not (eobp))
916 (insert "(\"")
917 (skip-chars-forward "a-z")
918 (insert "\" \"")
919 (delete-char 1)
920 (end-of-line)
8b735b2b
KH
921 (while (= (preceding-char) ?\r)
922 (delete-char -1))
c063e381
KH
923 (insert "\")")
924 (forward-line 1)))
925 (insert ")\n"))
926
927;; Generate a code of a Quail package in the current buffer from
928;; Ziranma dictionary in the buffer DICBUF. The input method name of
929;; the Quail package is NAME, and the title string is TITLE.
930
931(defun ziranma-converter (dicbuf name title)
932 (let (dic)
9a529312 933 (with-current-buffer dicbuf
c063e381 934 (goto-char (point-min))
8b735b2b
KH
935 (search-forward "\n%keyname end")
936 (forward-line 1)
c063e381
KH
937 (let ((table (make-hash-table :test 'equal))
938 elt pos key trans val)
939 (while (not (eobp))
940 (setq pos (point))
941 (skip-chars-forward "^ \t")
8b735b2b 942 (setq key (buffer-substring-no-properties pos (point)))
c063e381 943 (skip-chars-forward " \t")
8b735b2b
KH
944 (setq pos (point))
945 (skip-chars-forward "^\r\n")
946 (setq trans (vector (buffer-substring-no-properties pos (point))))
c063e381
KH
947 (setq val (gethash key table))
948 (if val (setq trans (vconcat val trans)))
949 (puthash key trans table)
950 (forward-line 1))
951 (maphash #'(lambda (key trans)
952 (let ((len (length trans))
953 i)
954 (if (and (= len 1) (= (length (aref trans 0)) 1))
955 (setq trans (aref trans 0))
956 (setq i 0)
957 (while (and (< i len)
958 (= (length (aref trans i)) 1))
959 (setq i (1+ i)))
960 (if (= i len)
961 (setq trans (mapconcat 'identity trans "")))))
962 (setq dic (cons (cons key trans) dic)))
963 table)))
964 (setq dic (sort dic (function (lambda (x y) (string< (car x) (car y))))))
965 (goto-char (point-max))
966 (insert (format "%S\n" "\e$A::WVJdHk!K!>WTH;!?!K\e(B
967
968 \e$A<|EL6TUU1m\e(B:
969 \e$A)3)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)7\e(B
970 \e$A)'#Q\e(B \e$A)'#W\e(B \e$A)'#E\e(B \e$A)'#R\e(B \e$A)'#T\e(B \e$A)'#Y\e(B \e$A)'#U\e(Bsh\e$A)'#I\e(Bch\e$A)'#O\e(B \e$A)'#P\e(B \e$A)'\e(B
971 \e$A)'\e(B iu\e$A)'\e(B ua\e$A)'\e(B e\e$A)'\e(B uan\e$A)'\e(B ue\e$A)'\e(B uai\e$A)'\e(B u\e$A)'\e(B i\e$A)'\e(B o\e$A)'\e(B un\e$A)'\e(B
972 \e$A)'\e(B \e$A)'\e(B ia\e$A)'\e(B \e$A)'\e(B van\e$A)'\e(B ve\e$A)'\e(B ing\e$A)'\e(B \e$A)'\e(B \e$A)'\e(B uo\e$A)'\e(B vn\e$A)'\e(B
973 \e$A);)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)?\e(B
974 \e$A)'#A\e(B \e$A)'#S\e(B \e$A)'#D\e(B \e$A)'#F\e(B \e$A)'#G\e(B \e$A)'#H\e(B \e$A)'#J\e(B \e$A)'#K\e(B \e$A)'#L\e(B \e$A)'\e(B
975 \e$A)'\e(B a\e$A)'\e(Biong\e$A)'\e(Buang\e$A)'\e(B en\e$A)'\e(B eng\e$A)'\e(B ang\e$A)'\e(B an\e$A)'\e(B ao\e$A)'\e(B ai\e$A)'\e(B
976 \e$A)'\e(B \e$A)'\e(B ong\e$A)'\e(Biang\e$A)'\e(B \e$A)'\e(B ng\e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B
977 \e$A);)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)%)7\e(B
978 \e$A)'#Z\e(B \e$A)'#X\e(B \e$A)'#C\e(B \e$A)'#V\e(Bzh\e$A)'#B\e(B \e$A)'#N\e(B \e$A)'#M\e(B \e$A)'#,\e(B \e$A)'#.\e(B \e$A)'\e(B \e$A#/\e(B \e$A)'\e(B
979 \e$A)'\e(B ei\e$A)'\e(B ie\e$A)'\e(B iao\e$A)'\e(B ui\e$A)'\e(B ou\e$A)'\e(B in\e$A)'\e(B ian\e$A)'G0R3)':sR3)'7{:E)'\e(B
980 \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B v\e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B
981 \e$A);)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)?\e(B
982
983
984Pinyin base input method for Chinese GB2312 characters (`chinese-gb2312').
985
986Pinyin is the standard roman transliteration method for Chinese.
987For the details of Pinyin system, see the documentation of the input
988method `chinese-py'.
989
990Unlike the standard spelling of Pinyin, in this input method all
991initials and finals are assigned to single keys (see the above table).
992For instance, the initial \"ch\" is assigned to the key `i', the final
993\"iu\" is assigned to the key `q', and tones 1, 2, 3, 4, and \e$AGaIy\e(B are
994assigned to the keys `q', `w', `e', `r', `t' respectively.
995
996\\<quail-translation-docstring>
997
998To input one-letter words, you type 4 keys, the first two for the
999Pinyin of the letter, next one for tone, and the last one is always a
1000quote ('). For instance, \"vsq'\" input \e$AVP\e(B. Exceptions are these
1001letters. You can input them just by typing a single key.
1002
1003 Character: \e$A04\e(B \e$A2;\e(B \e$A4N\e(B \e$A5D\e(B \e$A6~\e(B \e$A7"\e(B \e$A8v\e(B \e$A:M\e(B \e$A3v\e(B \e$A<0\e(B \e$A?I\e(B \e$AAK\e(B \e$AC;\e(B
1004 Key: a b c d e f g h i j k l m
1005 Character: \e$ADc\e(B \e$AE7\e(B \e$AF,\e(B \e$AF_\e(B \e$AHK\e(B \e$AH}\e(B \e$AK{\e(B \e$AJG\e(B \e$AWE\e(B \e$ANR\e(B \e$AP!\e(B \e$AR;\e(B \e$ATZ\e(B
1006 Key: n o p q r s t u v w x y z
1007
1008To input two-letter words, you have two ways. One way is to type 4
1009keys, two for the first Pinyin, two for the second Pinyin. For
1010instance, \"vsgo\" inputs \e$AVP9z\e(B. Another way is to type 3 keys: 2
1011initials of two letters, and quote ('). For instance, \"vg'\" also
1012inputs \e$AVP9z\e(B.
1013
1014To input three-letter words, you type 4 keys: initials of three
1015letters, and the last is quote ('). For instance, \"bjy'2\" inputs \e$A11\e(B
1016\e$A>)Q<\e(B (the last `2' is to select one of the candidates).
1017
1018To input words of more than three letters, you type 4 keys, initials
1019of the first three letters and the last letter. For instance,
1020\"bjdt\" inputs \e$A11>)5gJSL(\e(B.
1021
1022To input symbols and punctuations, type `/' followed by one of `a' to
1023`z', then select one of the candidates."))
1024 (insert " '((\"\C-?\" . quail-delete-last-char)
1025 (\".\" . quail-next-translation)
1026 (\"[\" . quail-next-translation)
1027 (\",\" . quail-prev-translation)
1028 (\"]\" . quail-prev-translation))
1029 nil nil nil nil)\n\n")
1030 (insert "(quail-define-rules\n")
1031 (dolist (elt dic)
1032 (insert (format "(%S %S)\n" (car elt) (cdr elt))))
1033 (insert ")\n")))
1034
a4c4011b
AC
1035;; Generate the code for a Quail package in the current buffer from a
1036;; CTLau or CTLau-b5 dictionary in the buffer DICBUF. The input
1037;; method name of the Quail package is NAME, and the title string is
1038;; TITLE. DESCRIPTION is the string shown by describe-input-method.
1039
1040(defun ctlau-converter (dicbuf name title description)
1041 (goto-char (point-max))
1042 (insert (format "%S\n" description))
1043 (insert " '((\"\C-?\" . quail-delete-last-char)
1044 (\".\" . quail-next-translation)
1045 (\">\" . quail-next-translation)
1046 (\",\" . quail-prev-translation)
1047 (\"<\" . quail-prev-translation))
1048 nil nil nil nil)\n\n")
1049 (insert "(quail-define-rules\n")
1050 (let (dicbuf-start dicbuf-end key-start key (pos (point)))
1051 ;; Find the dictionary, which starts below a horizontal rule and
1052 ;; ends at the second to last line in the HTML file.
9a529312 1053 (with-current-buffer dicbuf
a4c4011b 1054 (goto-char (point-min))
8b735b2b
KH
1055 (re-search-forward "^#<hr>")
1056 (forward-line 1)
a4c4011b
AC
1057 (setq dicbuf-start (point))
1058 (goto-char (point-max))
8b735b2b 1059 (re-search-backward "^<hr>")
a4c4011b 1060 (setq dicbuf-end (point)))
8b735b2b 1061 (insert-buffer-substring-no-properties dicbuf dicbuf-start dicbuf-end)
a4c4011b
AC
1062 ;; CTLau-b5.html contains characters (0xa1 0xbc) which show up as
1063 ;; hollow boxes when the original characters in CTLau.html from
1064 ;; which the file is converted have no Big5 equivalent. Go
1065 ;; through and delete them.
1066 (goto-char pos)
1067 (while (search-forward "\e$(0!{\e(B" nil t)
1068 (delete-char -1))
1069 ;; Uppercase keys in dictionary need to be downcased. Backslashes
1070 ;; at the beginning of keys need to be turned into double
1071 ;; backslashes.
1072 (goto-char pos)
1073 (while (not (eobp))
1074 (insert "(\"")
1075 (if (char-equal (following-char) ?\\)
1076 (insert "\\"))
1077 (setq key-start (point))
1078 (skip-chars-forward "\\\\A-Z")
1079 (downcase-region key-start (point))
1080 (insert "\" \"")
1081 (delete-char 1)
1082 (end-of-line)
8b735b2b
KH
1083 (while (= (preceding-char) ?\r)
1084 (delete-char -1))
a4c4011b
AC
1085 (insert "\")")
1086 (forward-line 1)))
1087 (insert ")\n"))
1088
1089(defun ctlau-gb-converter (dicbuf name title)
1090 (ctlau-converter dicbuf name title
1091"\e$A::WVJdHk!KAuN}OiJ=TARt!K\e(B
1092
1093 \e$AAuN}OiJ=TASoW"Rt7=08\e(B
1094 Sidney Lau's Cantonese transcription scheme as described in his book
1095 \"Elementary Cantonese\", The Government Printer, Hong Kong, 1972.
1096 This file was prepared by Fung Fung Lee (\e$A@n7c7e\e(B).
1097 Originally converted from CTCPS3.tit
1098 Last modified: June 2, 1993.
1099
4e7e1f03 1100 Some infrequent GB characters are accessed by typing \\, followed by
a4c4011b
AC
1101 the Cantonese romanization of the respective radical (\e$A2?JW\e(B)."))
1102
1103(defun ctlau-b5-converter (dicbuf name title)
1104 (ctlau-converter dicbuf name title
1105"\e$(0KH)tTT&,!(N,Tg>A*#Gn5x!(\e(B
1106
1107 \e$(0N,Tg>A*#GnM$0D5x'J7{\e(B
1108 Sidney Lau's Cantonese transcription scheme as described in his book
1109 \"Elementary Cantonese\", The Government Printer, Hong Kong, 1972.
1110 This file was prepared by Fung Fung Lee (\e$(0,XFS76\e(B).
1111 Originally converted from CTCPS3.tit
1112 Last modified: June 2, 1993.
1113
4e7e1f03 1114 Some infrequent characters are accessed by typing \\, followed by
a4c4011b
AC
1115 the Cantonese romanization of the respective radical (\e$(0?f5}\e(B)."))
1116
5cec3056 1117(declare-function dos-8+3-filename "dos-fns.el" (filename))
73e6adaa 1118
c063e381 1119(defun miscdic-convert (filename &optional dirname)
a1506d29 1120 "Convert a dictionary file FILENAME into a Quail package.
c063e381
KH
1121Optional argument DIRNAME if specified is the directory name under which
1122the generated Quail package is saved."
1123 (interactive "FInput method dictionary file: ")
1124 (or (file-readable-p filename)
1125 (error "%s does not exist" filename))
1126 (let ((tail quail-misc-package-ext-info)
98223b73 1127 coding-system-for-write
c063e381
KH
1128 slot
1129 name title dicfile coding quailfile converter copyright
1130 dicbuf)
1131 (while tail
d1df889e
KH
1132 (setq slot (car tail)
1133 dicfile (nth 2 slot)
1134 quailfile (nth 4 slot))
1135 (when (and (or (string-match dicfile filename)
1136 ;; MS-DOS filesystem truncates file names to 8+3
1137 ;; limits, so "cangjie-table.cns" becomes
1138 ;; "cangjie-.cns", and the above string-match
1139 ;; fails. Give DOS users a chance...
1140 (and (fboundp 'msdos-long-file-names)
1141 (not (msdos-long-file-names))
1142 (string-match (dos-8+3-filename dicfile) filename)))
1143 (if (file-newer-than-file-p
1144 filename (expand-file-name quailfile dirname))
1145 t
1146 (message "%s is up to date" quailfile)
1147 nil))
1148 (setq name (car slot)
c063e381 1149 title (nth 1 slot)
c063e381 1150 coding (nth 3 slot)
c063e381
KH
1151 converter (nth 5 slot)
1152 copyright (nth 6 slot))
1153 (message "Converting %s to %s..." dicfile quailfile)
f9362982
KH
1154 ;; Explicitly set eol format to `unix'.
1155 (setq coding-system-for-write
1156 (coding-system-change-eol-conversion coding 'unix))
c063e381 1157 (with-temp-file (expand-file-name quailfile dirname)
bb5b9e9d
MB
1158 (insert (format ";; Quail package `%s' -*- coding:%s; " name coding))
1159 (insert "byte-compile-disable-print-circle:t; -*-\n")
c063e381
KH
1160 (insert ";; Generated by the command `miscdic-convert'\n")
1161 (insert ";; Date: " (current-time-string) "\n")
1162 (insert ";; Source dictionary file: " dicfile "\n")
1163 (insert ";; Copyright notice of the source file\n")
1164 (insert ";;------------------------------------------------------\n")
1165 (insert copyright "\n")
1166 (insert ";;------------------------------------------------------\n")
1167 (insert "\n")
1168 (insert ";;; Code:\n\n")
1169 (insert "(require 'quail)\n")
1170 (insert "(quail-define-package \"" name "\" \""
98223b73
KH
1171 (if (eq coding 'big5) "Chinese-BIG5"
1172 (if (eq coding 'iso-2022-cn-ext) "Chinese-CNS"
1173 "Chinese-GB"))
c063e381 1174 "\" \"" title "\" t\n")
8b735b2b
KH
1175 (let* ((coding-system-for-read
1176 (coding-system-change-eol-conversion coding 'unix))
c063e381
KH
1177 (dicbuf (find-file-noselect filename)))
1178 (funcall converter dicbuf name title)
1179 (kill-buffer dicbuf)))
1180 (message "Converting %s to %s...done" dicfile quailfile))
1181 (setq tail (cdr tail)))))
1182
1183(defun batch-miscdic-convert ()
1184 "Run `miscdic-convert' on the files remaing on the command line.
1185Use this from the command line, with `-batch';
1186it won't work in an interactive Emacs.
1187If there's an argument \"-dir\", the next argument specifies a directory
1188to store generated Quail packages."
1189 (defvar command-line-args-left) ; Avoid compiler warning.
1190 (if (not noninteractive)
1191 (error "`batch-miscdic-convert' should be used only with -batch"))
1192 (let ((dir default-directory)
1193 filename)
1194 (while command-line-args-left
1195 (if (string= (car command-line-args-left) "-dir")
1196 (progn
1197 (setq command-line-args-left (cdr command-line-args-left))
0631f894
KH
1198 (setq dir (car command-line-args-left))
1199 (setq command-line-args-left (cdr command-line-args-left))))
c063e381
KH
1200 (setq filename (car command-line-args-left)
1201 command-line-args-left (cdr command-line-args-left))
1202 (if (file-directory-p filename)
1203 (dolist (file (directory-files filename t nil t))
98223b73
KH
1204 (or (file-directory-p file)
1205 (miscdic-convert file dir)))
c063e381
KH
1206 (miscdic-convert filename dir))))
1207 (kill-emacs 0))
1208
5cdaf2a5
KH
1209;; Local Variables:
1210;; coding: iso-2022-7bit
1211;; End:
60370d40 1212
cbee283d 1213;; arch-tag: 8ad478b2-a985-4da2-b47f-d8ee5d7c24a3
60370d40 1214;;; titdic-cnv.el ends here