Stephen Berman <stephen.berman at gmx.net>
[bpt/emacs.git] / lisp / international / titdic-cnv.el
CommitLineData
c7211fed 1;;; titdic-cnv.el --- convert cxterm dictionary (TIT format) to Quail package -*- coding:iso-2022-7bit; -*-
4ed46869 2
d4877ac1 3;; Copyright (C) 1997, 1998, 2000, 2001, 2002, 2003, 2004,
ae940284 4;; 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
7976eda0 5;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
ae940284 6;; 2005, 2006, 2007, 2008, 2009
2fd125a3
KH
7;; National Institute of Advanced Industrial Science and Technology (AIST)
8;; Registration Number H14PRO021
8f924df7
KH
9;; Copyright (C) 2003
10;; National Institute of Advanced Industrial Science and Technology (AIST)
11;; Registration Number H13PRO009
4ed46869
KH
12
13;; Keywords: Quail, TIT, cxterm
14
15;; This file is part of GNU Emacs.
16
4936186e 17;; GNU Emacs is free software: you can redistribute it and/or modify
4ed46869 18;; it under the terms of the GNU General Public License as published by
4936186e
GM
19;; the Free Software Foundation, either version 3 of the License, or
20;; (at your option) any later version.
4ed46869
KH
21
22;; GNU Emacs is distributed in the hope that it will be useful,
23;; but WITHOUT ANY WARRANTY; without even the implied warranty of
24;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25;; GNU General Public License for more details.
26
27;; You should have received a copy of the GNU General Public License
4936186e 28;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
4ed46869 29
60370d40 30;;; Commentary:
4ed46869 31
49ed466f 32;; Convert cxterm dictionary (of TIT format) to quail-package.
4ed46869
KH
33;;
34;; Usage (within Emacs):
49ed466f 35;; M-x titdic-convert<CR>CXTERM-DICTIONARY-NAME<CR>
4ed46869 36;; Usage (from shell):
49ed466f 37;; % emacs -batch -l titdic-cnv -f batch-titdic-convert\
4ed46869
KH
38;; [-dir DIR] [DIR | FILE] ...
39;;
40;; When you run titdic-convert within Emacs, you have a chance to
41;; modify arguments of `quail-define-package' before saving the
42;; converted file. For instance, you are likely to modify TITLE,
43;; DOCSTRING, and KEY-BINDINGS.
44
49ed466f 45;; Cxterm dictionary file (*.tit) is a line-oriented text (English,
4ed46869
KH
46;; Chinese, Japanese, and Korean) file. The whole file contains of
47;; two parts, the definition part (`header' here after) followed by
48;; the dictionary part (`body' here after). All lines begin with
49;; leading '#' are ignored.
50;;
51;; Each line in the header part has two fields, KEY and VALUE. These
52;; fields are separated by one or more white characters.
53;;
54;; Each line in the body part has two fields, KEYSEQ and TRANSLATIONS.
55;; These fields are separated by one or more white characters.
56;;
57;; See the manual page of `tit2cit' of cxterm distribution for more
58;; detail.
c063e381 59;;
b138056a 60;; Near the end of this file, we also have a few other tools to convert
c063e381 61;; miscellaneous dictionaries.
4ed46869
KH
62
63;;; Code:
64
65(require 'quail)
66
49ed466f 67;; List of values of key "ENCODE:" and the corresponding Emacs
4ed46869
KH
68;; coding-system and language environment name.
69(defvar tit-encode-list
a7f2c216
KH
70 '(("GB" euc-china "Chinese-GB")
71 ("BIG5" cn-big5 "Chinese-BIG5")
72 ("JIS" euc-japan "Japanese")
49ed466f
KH
73 ("KS" euc-kr "Korean")))
74
4558e816
KH
75;; Alist of input method names and the corresponding title and extra
76;; docstring. For each of input method generated from TIT dictionary,
77;; a docstring is automatically generated from the comments in the
78;; dictionary. The extra docstring in this alist is to add more
79;; information.
80;; The command describe-input-method shows the automatically generated
c7211fed 81;; docstring, then an extra docstring while replacing the form \<VAR>
4558e816
KH
82;; by the value of variable VAR. For instance, the form
83;; \<quail-translation-docstring> is replaced by a description about
84;; how to select a translation from a list of candidates.
85
6b1e079c
KH
86(defvar quail-cxterm-package-ext-info
87 '(("chinese-4corner" "\e$(0(?-F\e(B")
88 ("chinese-array30" "\e$(0#R#O\e(B")
4558e816
KH
89 ("chinese-ccdospy" "\e$AKuF4\e(B"
90 "Pinyin base input method for Chinese charset GB2312 \(`chinese-gb2312').
91
c7211fed 92Pinyin is the standard Roman transliteration method for Chinese.
4558e816
KH
93For the detail of Pinyin system, see the documentation of the input
94method `chinese-py'.
95
96This input method works almost the same way as `chinese-py'. The
97difference is that you type a single key for these Pinyin spelling.
98 Pinyin: zh en eng ang ch an ao ai ong sh ing yu(\e$A(9\e(B)
99 keyseq: a f g h i j k l s u y v
c7211fed 100For example:
4558e816
KH
101 Chinese: \e$A0!\e(B \e$A9{\e(B \e$AVP\e(B \e$AND\e(B \e$A9b\e(B \e$ASq\e(B \e$AH+\e(B
102 Pinyin: a guo zhong wen guang yu quan
103 Keyseq: a1 guo4 as1 wf4 guh1 yu..6 qvj6
104
105\\<quail-translation-docstring>
106
107For double-width GB2312 characters correponding to ASCII, use the
108input method `chinese-qj'.")
109
4558e816
KH
110 ("chinese-ecdict" "\e$(05CKH\e(B"
111"In this input method, you enter a Chinese (Big5) charactere or word
112by typing the corresponding English word. For example, if you type
113\"computer\", \"\e$(0IZH+\e(B\" is input.
114
115\\<quail-translation-docstring>")
116
117 ("chinese-etzy" "\e$(06/0D\e(B"
118"Zhuyin base input method for Chinese Big5 characters (`chinese-big5-1',
119`chinese-big5-2').
120
121Zhuyin is a kind of phonetic symbol. One to three Zhuyin symbols
122compose one Chinese character.
123
124In this input method, you enter a Chinese character by first typing
125keys corresponding to Zhuyin symbols (see the above table) followed by
126SPC, 1, 2, 3, or 4 specifing a tone (SPC:\e$(0?v(N\e(B, 1:\e$(0M=Vy\e(B, 2:\e$(0Dm(N\e(B, 3: \e$(0&9Vy\e(B,
1274:\e$(0(+Vy\e(B).
128
129\\<quail-translation-docstring>")
6b1e079c
KH
130
131 ("chinese-punct-b5" "\e$(0O:\e(BB"
132 "Input method for Chinese punctuations and symbols of Big5
133\(`chinese-big5-1' and `chinese-big5-2').")
134
135 ("chinese-punct" "\e$A1j\e(BG"
136 "Input method for Chinese punctuations and symbols of GB2312
137\(`chinese-gb2312').")
138
139 ("chinese-py-b5" "\e$(03<\e(BB"
140 "Pinyin base input method for Chinese Big5 characters
141\(`chinese-big5-1', `chinese-big5-2').
142
143This input method works almost the same way as `chinese-py' (which
144see).
145
146This input method supports only Han characters. The more convenient
43b11fee
EZ
147method is `chinese-py-punct-b5', which is the combination of this
148method and `chinese-punct-b5' and which supports both Han characters
149and punctuation/symbols.
6b1e079c 150
43b11fee 151For double-width Big5 characters corresponding to ASCII, use the input
6b1e079c
KH
152method `chinese-qj-b5'.
153
154The input method `chinese-py' and `chinese-tonepy' are also Pinyin
43b11fee 155based, but for the character set GB2312 (`chinese-gb2312').")
6b1e079c 156
4558e816
KH
157 ("chinese-qj-b5" "\e$(0)A\e(BB")
158
159 ("chinese-qj" "\e$AH+\e(BG")
160
6b1e079c 161 ("chinese-sw" "\e$AJWN2\e(B"
4558e816
KH
162"Radical base input method for Chinese charset GB2312 (`chinese-gb2312').
163
d20faceb
EZ
164In this input method, you enter a Chinese character by typing two
165keys. The first key corresponds to the first (\e$AJW\e(B) radical, the second
166key corresponds to the last (\e$AN2\e(B) radical. The correspondence of keys
167and radicals is as below:
4558e816
KH
168
169 first radical:
170 a b c d e f g h i j k l m n o p q r s t u v w x y z
a1506d29 171 \e$APD\e(B \e$AZ"\e(B \e$AJ,\e(B \e$AX<\e(B \e$A;p\e(B \e$A?Z\e(B \e$A^P\e(B \e$Ac_\e(B \e$AZ%\e(B \e$A\3\e(B \e$AXi\e(B \e$AD>\e(B \e$Alj\e(B \e$Ab;\e(B \e$ATB\e(B \e$Afy\e(B \e$AJ/\e(B \e$AMu\e(B \e$A0K\e(B \e$AX/\e(B \e$AHU\e(B \e$AeA\e(B \e$Aak\e(B \e$AVq\e(B \e$AR;\e(B \e$AHK\e(B
4558e816
KH
172 last radical:
173 a b c d e f g h i j k l m n o p q r s t u v w x y z
a1506d29 174 \e$ASV\e(B \e$AI=\e(B \e$AMA\e(B \e$A56\e(B \e$AZb\e(B \e$A?Z\e(B \e$ARB\e(B \e$Aqb\e(B \e$A4s\e(B \e$A6!\e(B \e$A[L\e(B \e$Ala\e(B \e$AJ.\e(B \e$A4u\e(B \e$AXg\e(B \e$ACE\e(B \e$A=q\e(B \e$AX-\e(B \e$AE.\e(B \e$ARR\e(B \e$A`m\e(B \e$AP!\e(B \e$A3'\e(B \e$A3f\e(B \e$A_.\e(B \e$A27\e(B
4558e816 175
43e5a7fe 176\\<quail-translation-docstring>")
4558e816 177
6b1e079c
KH
178 ("chinese-tonepy" "\e$A5wF4\e(B"
179 "Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312').
180
c3ff164a 181Pinyin is the standard roman transliteration method for Chinese.
d20faceb 182For the details of Pinyin system, see the documentation of the input
6b1e079c
KH
183method `chinese-py'.
184
185This input method works almost the same way as `chinese-py'. The
4558e816
KH
186difference is that you must type 1..5 after each Pinyin spelling to
187specify a tone (1:\e$ARuF=\e(B, 2:\e$AQtF=\e(B, 3:\e$AIOIy\e(B, 4\e$AOBIy\e(B, 5:\e$AGaIy\e(B).
188
43e5a7fe 189\\<quail-translation-docstring>
4558e816
KH
190
191For instance, to input \e$ADc\e(B, you type \"n i 3 3\", the first \"n i\" is
192a Pinyin, the next \"3\" specifies tone, and the last \"3\" selects
193the third character from the candidate list.
6b1e079c
KH
194
195For double-width GB2312 characters correponding to ASCII, use the
196input method `chinese-qj'.")
197
4558e816
KH
198 ("chinese-zozy" "\e$(0I\0D\e(B"
199"Zhuyin base input method for Chinese Big5 characters (`chinese-big5-1',
200`chinese-big5-2').
201
d20faceb 202Zhuyin is a kind of a phonetic symbol. One to three Zhuyin symbols
4558e816
KH
203compose a Chinese character.
204
205In this input method, you enter a Chinese character by first typing
206keys corresponding to Zhuyin symbols (see the above table) followed by
207SPC, 6, 3, 4, or 7 specifing a tone (SPC:\e$(0?v(N\e(B, 6:\e$(0Dm(N\e(B, 3:\e$(0&9Vy\e(B, 4:\e$(0(+Vy\e(B,
2087:\e$(0M=Vy\e(B).
209
43e5a7fe 210\\<quail-translation-docstring>")))
4ed46869
KH
211
212;; Return a value of the key in the current line.
213(defsubst tit-read-key-value ()
6b61353c 214 (if (looking-at "[^ \t\r\n]+")
4ed46869
KH
215 (car (read-from-string (concat "\"" (match-string 0) "\"")))))
216
217;; Return an appropriate quail-package filename from FILENAME (TIT
49ed466f
KH
218;; dictionary filename). For instance, ".../ZOZY.tit" -> "ZOZY.el".
219(defun tit-make-quail-package-file-name (filename &optional dirname)
4ed46869 220 (expand-file-name
49ed466f 221 (concat (file-name-nondirectory (substring filename 0 -4)) ".el")
4ed46869
KH
222 dirname))
223
1375754c 224;; This value is nil if we are processing phrase dictionary.
8c1ccc6c 225(defvar tit-dictionary t)
4ed46869
KH
226(defvar tit-encode nil)
227(defvar tit-default-encode "GB")
228
229;; Generate elements of KEY-BINDINGS arg for `quail-define-package' so
230;; that each characters in KEYS invokes FUNCTION-SYMBOL.
231(defun tit-generate-key-bindings (keys function-symbol)
232 (let ((len (length keys))
233 (i 0)
1375754c 234 (first t)
4ed46869
KH
235 key)
236 (while (< i len)
1375754c 237 (or first (princ "\n "))
4ed46869 238 (setq key (aref keys i))
1375754c
KH
239 (if (if (< key ?\ )
240 (eq (lookup-key quail-translation-keymap
241 (char-to-string key))
4ed46869 242 'quail-execute-non-quail-command)
1375754c
KH
243 (<= key 127))
244 (progn
245 (princ (cons (cond ((< key ?\ ) (format "\"\\C-%c\"" (+ key ?@)))
246 ((< key 127) (format "\"%c\"" key))
247 (t "\"\\C-?\""))
248 function-symbol))
249 (setq first nil)))
4ed46869
KH
250 (setq i (1+ i)))))
251
252;; Analyze header part of TIT dictionary and generate an appropriate
253;; `quail-define-package' function call.
254(defun tit-process-header (filename)
255 (message "Processing header part...")
256 (goto-char (point-min))
257
1375754c
KH
258 ;; At first, generate header part of the Quail package while
259 ;; collecting information from the original header.
260 (let ((package (concat
261 "chinese-"
262 (substring (downcase (file-name-nondirectory filename))
263 0 -4)))
264 ;; TIT keywords and the corresponding default values.
4ed46869
KH
265 (tit-multichoice t)
266 (tit-prompt "")
267 (tit-comments nil)
268 (tit-backspace "\010\177")
269 (tit-deleteall "\015\025")
270 (tit-moveright ".>")
271 (tit-moveleft ",<")
272 (tit-keyprompt nil))
1375754c
KH
273
274 (princ ";; Quail package `")
98223b73 275 (princ package)
bb5b9e9d 276 (princ (format "' -*- coding:%s; " coding-system-for-write))
7d15a839 277 (princ "byte-compile-disable-print-circle:t; -*-\n")
c7211fed 278 (princ ";; Generated by the command `titdic-convert'\n;;\tDate: ")
1375754c
KH
279 (princ (current-time-string))
280 (princ "\n;;\tOriginal TIT dictionary file: ")
281 (princ (file-name-nondirectory filename))
282 (princ "\n\n;;; Comment:\n\n")
86e4f7c0 283 (princ ";; Byte-compile this file again after any modification.\n\n")
1375754c
KH
284 (princ ";;; Start of the header of original TIT dictionary.\n\n")
285
4ed46869 286 (while (not (eobp))
1375754c
KH
287 (let ((ch (following-char))
288 (pos (point)))
4ed46869
KH
289 (cond ((= ch ?C) ; COMMENT
290 (cond ((looking-at "COMMENT")
291 (let ((pos (match-end 0)))
292 (end-of-line)
8b735b2b
KH
293 (setq tit-comments
294 (cons (buffer-substring-no-properties pos (point))
295 tit-comments))))))
4ed46869
KH
296 ((= ch ?M) ; MULTICHOICE, MOVERIGHT, MOVELEFT
297 (cond ((looking-at "MULTICHOICE:[ \t]*")
298 (goto-char (match-end 0))
299 (setq tit-multichoice (looking-at "YES")))
300 ((looking-at "MOVERIGHT:[ \t]*")
301 (goto-char (match-end 0))
302 (setq tit-moveright (tit-read-key-value)))
303 ((looking-at "MOVELEFT:[ \t]*")
304 (goto-char (match-end 0))
305 (setq tit-moveleft (tit-read-key-value)))))
306 ((= ch ?P) ; PROMPT
307 (cond ((looking-at "PROMPT:[ \t]*")
308 (goto-char (match-end 0))
6b1e079c
KH
309 (setq tit-prompt (tit-read-key-value))
310 ;; Some TIT dictionaies that are encoded by
311 ;; euc-china contains invalid character at the tail.
312 (let* ((last (aref tit-prompt (1- (length tit-prompt))))
313 (split (split-char last)))
314 (if (or (eq (nth 1 split) 32)
315 (eq (nth 2 split) 32))
316 (setq tit-prompt (substring tit-prompt 0 -1)))))))
4ed46869
KH
317 ((= ch ?B) ; BACKSPACE, BEGINDICTIONARY,
318 ; BEGINPHRASE
319 (cond ((looking-at "BACKSPACE:[ \t]*")
320 (goto-char (match-end 0))
321 (setq tit-backspace (tit-read-key-value)))
322 ((looking-at "BEGINDICTIONARY")
1375754c 323 (setq tit-dictionary t))
4ed46869 324 ((looking-at "BEGINPHRASE")
1375754c 325 (setq tit-dictionary nil))))
4ed46869
KH
326 ((= ch ?K) ; KEYPROMPT
327 (cond ((looking-at "KEYPROMPT(\\(.*\\)):[ \t]*")
328 (let ((key-char (match-string 1)))
329 (goto-char (match-end 0))
1fa1e1f5
RS
330 (if (string-match "\\\\[0-9]+" key-char)
331 (setq key-char
332 (car (read-from-string (format "\"%s\""
333 key-char)))))
4ed46869
KH
334 (setq tit-keyprompt
335 (cons (cons key-char (tit-read-key-value))
1375754c
KH
336 tit-keyprompt)))))))
337 (end-of-line)
338 (princ ";; ")
8b735b2b 339 (princ (buffer-substring-no-properties pos (point)))
1375754c
KH
340 (princ "\n")
341 (forward-line 1)))
a1506d29 342
1375754c
KH
343 (princ "\n;;; End of the header of original TIT dictionary.\n\n")
344 (princ ";;; Code:\n\n(require 'quail)\n\n")
345
346 (princ "(quail-define-package ")
347 ;; Args NAME, LANGUAGE, TITLE
6b1e079c 348 (let ((title (nth 1 (assoc package quail-cxterm-package-ext-info))))
1375754c
KH
349 (princ "\"")
350 (princ package)
351 (princ "\" \"")
352 (princ (nth 2 (assoc tit-encode tit-encode-list)))
353 (princ "\" \"")
354 (princ (or title
355 (if (string-match "[:\e$A!K\e$(0!(!J\e(B]+\\([^:\e$A!K\e$(0!(!K\e(B]+\\)" tit-prompt)
356 (substring tit-prompt (match-beginning 1) (match-end 1))
357 tit-prompt)))
358 (princ "\"\n"))
4ed46869
KH
359
360 ;; Arg GUIDANCE
361 (if tit-keyprompt
362 (progn
1375754c 363 (princ " '(")
4ed46869 364 (while tit-keyprompt
1375754c
KH
365 (princ " ")
366 (princ (format "(%d . \"%s\")\n"
367 (string-to-char (car (car tit-keyprompt)))
368 (cdr (car tit-keyprompt))))
4ed46869 369 (setq tit-keyprompt (cdr tit-keyprompt)))
1375754c
KH
370 (princ ")"))
371 (princ " t\n"))
4ed46869
KH
372
373 ;; Arg DOCSTRING
6b1e079c
KH
374 (let ((doc (concat tit-prompt "\n"))
375 (comments (if tit-comments
376 (mapconcat 'identity (nreverse tit-comments) "\n")))
377 (doc-ext (nth 2 (assoc package quail-cxterm-package-ext-info))))
378 (if comments
379 (setq doc (concat doc "\n" comments "\n")))
380 (if doc-ext
381 (setq doc (concat doc "\n" doc-ext "\n")))
382 (prin1 doc)
383 (terpri))
4ed46869
KH
384
385 ;; Arg KEY-BINDINGS
1375754c 386 (princ " '(")
4ed46869 387 (tit-generate-key-bindings tit-backspace 'quail-delete-last-char)
1375754c 388 (princ "\n ")
4ed46869 389 (tit-generate-key-bindings tit-deleteall 'quail-abort-translation)
1375754c 390 (princ "\n ")
4ed46869 391 (tit-generate-key-bindings tit-moveright 'quail-next-translation)
1375754c 392 (princ "\n ")
4ed46869 393 (tit-generate-key-bindings tit-moveleft 'quail-prev-translation)
1375754c 394 (princ ")\n")
4ed46869
KH
395
396 ;; Args FORGET-TRANSLATION, DETERMINISTIC, KBD-TRANSLATE, SHOW-LAYOUT.
397 ;; The remaining args are all nil.
1375754c
KH
398 (princ " nil")
399 (princ (if tit-multichoice " nil" " t"))
400 (princ (if tit-keyprompt " t t)\n\n" " nil nil)\n\n"))))
401
402(defsubst tit-flush-translations (key translations)
403 (if (string-match "\\\\[0-9][0-9][0-9]" key)
404 (let ((newkey (concat (substring key 0 (match-beginning 0))
405 (car (read-from-string
406 (concat "\"" (match-string 0 key) "\"")))))
407 (idx (match-end 0)))
408 (while (string-match "\\\\[0-9][0-9][0-9]" key idx)
409 (setq newkey (concat
410 newkey
411 (substring key idx (match-beginning 0))
412 (car (read-from-string
413 (concat "\"" (match-string 0 key) "\"")))))
414 (setq idx (match-end 0)))
415 (setq key (concat newkey (substring key idx)))))
416 (prin1 (list key (if tit-dictionary translations
417 (vconcat (nreverse translations)))))
418 (princ "\n"))
4ed46869
KH
419
420;; Convert body part of TIT dictionary into `quail-define-rules'
421;; function call.
422(defun tit-process-body ()
423 (message "Formatting translation rules...")
1375754c
KH
424 (let* ((template (list nil nil))
425 (second (cdr template))
426 (prev-key "")
427 ch key translations pos)
428 (princ "(quail-define-rules\n")
4ed46869 429 (while (null (eobp))
1375754c
KH
430 (setq ch (following-char))
431 (if (or (= ch ?#) (= ch ?\n))
432 (forward-line 1)
4ed46869 433 (setq pos (point))
1375754c 434 (skip-chars-forward "^ \t\n")
8b735b2b 435 (setq key (buffer-substring-no-properties pos (point)))
4ed46869 436 (skip-chars-forward " \t")
1375754c
KH
437 (setq ch (following-char))
438 (if (or (= ch ?#) (= ch ?\n))
08a1bf22 439 ;; This entry contains no translations. Let's ignore it.
1375754c
KH
440 (forward-line 1)
441 (or (string= key prev-key)
08a1bf22 442 (progn
1375754c
KH
443 (if translations
444 (tit-flush-translations prev-key translations))
445 (setq translations nil
446 prev-key key)))
447 (if tit-dictionary
448 (progn
449 (setq pos (point))
450 (skip-chars-forward "^ \t#\n")
451 (setq translations
452 (if translations
453 (concat translations
8b735b2b
KH
454 (buffer-substring-no-properties pos (point)))
455 (buffer-substring-no-properties pos (point)))))
1375754c
KH
456 (while (not (eolp))
457 (setq pos (point))
458 (skip-chars-forward "^ \t\n")
8b735b2b
KH
459 (setq translations (cons (buffer-substring-no-properties
460 pos (point))
1375754c
KH
461 translations))
462 (skip-chars-forward " \t")
463 (setq ch (following-char))
464 (if (= ch ?#) (end-of-line))))
08a1bf22 465 (forward-line 1))))
1375754c
KH
466
467 (if translations
468 (tit-flush-translations prev-key translations))
469 (princ ")\n")))
4ed46869
KH
470
471;;;###autoload
472(defun titdic-convert (filename &optional dirname)
473 "Convert a TIT dictionary of FILENAME into a Quail package.
474Optional argument DIRNAME if specified is the directory name under which
475the generated Quail package is saved."
476 (interactive "FTIT dictionary file: ")
98223b73 477 (let ((coding-system-for-write nil))
c7211fed 478 (with-temp-file (tit-make-quail-package-file-name filename dirname)
c7211fed
DL
479 (let ((standard-output (current-buffer)))
480 (with-temp-buffer
481 (set-buffer-multibyte nil)
f9362982
KH
482 ;; Here we must use `raw-text' instead of `no-conversion' to
483 ;; enable auto-decoding of eol format (CRLF->LF).
484 (let ((coding-system-for-read 'raw-text))
c7211fed 485 (insert-file-contents (expand-file-name filename)))
a1506d29 486
c7211fed
DL
487 ;; Decode the buffer contents from the encoding specified by a
488 ;; value of the key "ENCODE:".
489 (if (not (search-forward "\nBEGIN" nil t))
490 (error "TIT dictionary doesn't have body part"))
491 (let ((limit (point))
492 coding-system slot)
493 (goto-char (point-min))
494 (if (re-search-forward "^ENCODE:[ \t]*" limit t)
495 (progn
496 (goto-char (match-end 0))
497 (setq tit-encode (tit-read-key-value)))
498 (setq tit-encode tit-default-encode))
499 (setq slot (assoc tit-encode tit-encode-list))
500 (if (not slot)
501 (error "Invalid ENCODE: value in TIT dictionary"))
502 (setq coding-system (nth 1 slot))
503 (message "Decoding with coding system %s..." coding-system)
504 (goto-char (point-min))
98223b73 505 (decode-coding-region (point-min) (point-max) coding-system)
f9362982
KH
506 ;; Explicitly set eol format to `unix'.
507 (setq coding-system-for-write
508 (coding-system-change-eol-conversion coding-system 'unix))
98223b73 509 (remove-text-properties (point-min) (point-max) '(charset nil)))
c7211fed 510
66b820ba 511 (set-buffer-multibyte t)
c7211fed 512 ;; Set point the starting position of the body part.
1375754c 513 (goto-char (point-min))
c7211fed
DL
514 (if (not (search-forward "\nBEGIN" nil t))
515 (error "TIT dictionary can't be decoded correctly"))
516
2db18278 517 ;; Process the header part.
c7211fed
DL
518 (forward-line 1)
519 (narrow-to-region (point-min) (point))
520 (tit-process-header filename)
521 (widen)
522
8f924df7 523 ;; Process the body part
c7211fed 524 (tit-process-body))))))
4ed46869
KH
525
526;;;###autoload
44cbfae9 527(defun batch-titdic-convert (&optional force)
4ed46869
KH
528 "Run `titdic-convert' on the files remaining on the command line.
529Use this from the command line, with `-batch';
530it won't work in an interactive Emacs.
531For example, invoke \"emacs -batch -f batch-titdic-convert XXX.tit\" to
532 generate Quail package file \"xxx.el\" from TIT dictionary file \"XXX.tit\".
533To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
534 (defvar command-line-args-left) ; Avoid compiler warning.
535 (if (not noninteractive)
536 (error "`batch-titdic-convert' should be used only with -batch"))
537 (if (string= (car command-line-args-left) "-h")
538 (progn
539 (message "To convert XXX.tit and YYY.tit into xxx.el and yyy.el:")
540 (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert XXX.tit YYY.tit")
541 (message "To convert XXX.tit into DIR/xxx.el:")
542 (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert -dir DIR XXX.tit"))
543 (let (targetdir filename files file)
544 (if (string= (car command-line-args-left) "-dir")
545 (progn
546 (setq command-line-args-left (cdr command-line-args-left))
547 (setq targetdir (car command-line-args-left))
548 (setq command-line-args-left (cdr command-line-args-left))))
549 (while command-line-args-left
550 (setq filename (expand-file-name (car command-line-args-left)))
551 (if (file-directory-p filename)
552 (progn
553 (message "Converting all tit files in the directory %s" filename)
554 (setq files (directory-files filename t "\\.tit$")))
555 (setq files (list filename)))
556 (while files
557 (setq file (expand-file-name (car files)))
44cbfae9
KH
558 (when (or force
559 (file-newer-than-file-p
560 file (tit-make-quail-package-file-name file targetdir)))
561 (message "Converting %s to quail-package..." file)
562 (titdic-convert file targetdir))
4ed46869
KH
563 (setq files (cdr files)))
564 (setq command-line-args-left (cdr command-line-args-left)))
86e4f7c0 565 (message "Byte-compile the created files by:")
4ed46869
KH
566 (message " %% emacs -batch -f batch-byte-compile XXX.el")))
567 (kill-emacs 0))
568
c063e381
KH
569\f
570;;; Converter of miscellaneous dictionaries other than TIT format.
571
572;; Alist of input method names and the corresponding information.
573;; Each element has this form:
574;; (INPUT-METHOD-NAME ;; Name of the input method.
a1506d29 575;; INPUT-METHOD-TITLE ;; Title string of the input method
c063e381
KH
576;; DICFILE ;; Name of the source dictionary file.
577;; CODING ;; Coding system of the dictionary file.
578;; QUAILFILE ;; Name of the Quail package file.
579;; CONVERTER ;; Function to generate the Quail package.
580;; COPYRIGHT-NOTICE ;; Copyright notice of the source dictionary.
581;; )
582
583(defvar quail-misc-package-ext-info
584 '(("chinese-b5-tsangchi" "\e$(06A\e(BB"
a1506d29 585 "cangjie-table.b5" big5 "tsang-b5.el"
c063e381
KH
586 tsang-b5-converter
587 "\
588;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
589;; #
590;; # Permission to copy and distribute both modified and
591;; # unmodified versions is granted without royalty provided
592;; # this notice is preserved.")
593
594 ("chinese-b5-quick" "\e$(0X|\e(BB"
a1506d29 595 "cangjie-table.b5" big5 "quick-b5.el"
c063e381
KH
596 quick-b5-converter
597 "\
598;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
599;; #
600;; # Permission to copy and distribute both modified and
601;; # unmodified versions is granted without royalty provided
602;; # this notice is preserved.")
603
604 ("chinese-cns-tsangchi" "\e$(GT?\e(BC"
605 "cangjie-table.cns" iso-2022-cn-ext "tsang-cns.el"
606 tsang-cns-converter
607 "\
608;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
609;; #
610;; # Permission to copy and distribute both modified and
611;; # unmodified versions is granted without royalty provided
612;; # this notice is preserved.")
613
614 ("chinese-cns-quick" "\e$(Gv|\e(BC"
615 "cangjie-table.cns" iso-2022-cn-ext "quick-cns.el"
616 quick-cns-converter
617 "\
618;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
619;; #
620;; # Permission to copy and distribute both modified and
621;; # unmodified versions is granted without royalty provided
622;; # this notice is preserved.")
623
624 ("chinese-py" "\e$AF4\e(BG"
625 "pinyin.map" cn-gb-2312 "PY.el"
626 py-converter
627 "\
628;; \"pinyin.map\" is included in a free package called CCE. It is
629;; available at:
630;; http://ftp.debian.org/debian/dists/potato/main
631;; /source/utils/cce_0.36.orig.tar.gz
632;; This package contains the following copyright notice.
633;;
634;;
635;; Copyright (C) 1999, Rui He, herui@cs.duke.edu
a1506d29
JB
636;;
637;;
c063e381 638;; CCE(Console Chinese Environment) 0.32
a1506d29
JB
639;;
640;; CCE is free software; you can redistribute it and/or modify it under the
641;; terms of the GNU General Public License as published by the Free Software
642;; Foundation; either version 1, or (at your option) any later version.
643;;
644;; CCE is distributed in the hope that it will be useful, but WITHOUT ANY
645;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
646;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
647;; details.
648;;
c063e381 649;; You should have received a copy of the GNU General Public License along with
a1506d29 650;; CCE; see the file COPYING. If not, write to the Free Software Foundation,
3ef97fb6 651;; 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.")
c063e381
KH
652
653 ("chinese-ziranma" "\e$AWTH;\e(B"
654 "ziranma.cin" cn-gb-2312 "ZIRANMA.el"
655 ziranma-converter
656 "\
657;; \"ziranma.cin\" is included in a free package called CCE. It is
658;; available at:
659;; http://ftp.debian.org/debian/dists/potato/main
660;; /source/utils/cce_0.36.orig.tar.gz
661;; This package contains the following copyright notice.
662;;
663;;
664;; Copyright (C) 1999, Rui He, herui@cs.duke.edu
a1506d29
JB
665;;
666;;
c063e381 667;; CCE(Console Chinese Environment) 0.32
a1506d29
JB
668;;
669;; CCE is free software; you can redistribute it and/or modify it under the
670;; terms of the GNU General Public License as published by the Free Software
671;; Foundation; either version 1, or (at your option) any later version.
672;;
673;; CCE is distributed in the hope that it will be useful, but WITHOUT ANY
674;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
675;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
676;; details.
677;;
c063e381 678;; You should have received a copy of the GNU General Public License along with
a1506d29 679;; CCE; see the file COPYING. If not, write to the Free Software Foundation,
3ef97fb6 680;; 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.")
a4c4011b 681
a4c4011b
AC
682 ("chinese-ctlau" "\e$AAuTA\e(B"
683 "CTLau.html" cn-gb-2312 "CTLau.el"
684 ctlau-gb-converter
685 "\
686;; \"CTLau.html\" is available at:
687;;
688;; http://umunhum.stanford.edu/~lee/chicomp/CTLau.html
689;;
690;; It contains the following copyright notice:
691;;
692;; # Copyright (C) 1988-2001 Fung Fung Lee (lee@umunhum.stanford.edu)
a1506d29 693;; #
a4c4011b
AC
694;; # This program is free software; you can redistribute it and/or
695;; # modify it under the terms of the GNU General Public License
696;; # as published by the Free Software Foundation; either version 2
697;; # of the License, or any later version.
a1506d29 698;; #
a4c4011b
AC
699;; # This program is distributed in the hope that it will be useful,
700;; # but WITHOUT ANY WARRANTY; without even the implied warranty of
701;; # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
702;; # GNU General Public License for more details.
a1506d29 703;; #
a4c4011b
AC
704;; # You should have received a copy of the GNU General Public License
705;; # along with this program; if not, write to the Free Software Foundation,
086add15 706;; # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.")
a4c4011b 707
e089be2d 708 ("chinese-ctlaub" "\e$(0N,Gn\e(B"
2bef0948 709 "CTLau-b5.html" big5 "CTLau-b5.el"
a4c4011b
AC
710 ctlau-b5-converter
711 "\
712;; \"CTLau-b5.html\" is available at:
713;;
714;; http://umunhum.stanford.edu/~lee/chicomp/CTLau-b5.html
715;;
716;; It contains the following copyright notice:
717;;
718;; # Copyright (C) 1988-2001 Fung Fung Lee (lee@umunhum.stanford.edu)
a1506d29 719;; #
a4c4011b
AC
720;; # This program is free software; you can redistribute it and/or
721;; # modify it under the terms of the GNU General Public License
722;; # as published by the Free Software Foundation; either version 2
723;; # of the License, or any later version.
a1506d29 724;; #
a4c4011b
AC
725;; # This program is distributed in the hope that it will be useful,
726;; # but WITHOUT ANY WARRANTY; without even the implied warranty of
727;; # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
728;; # GNU General Public License for more details.
a1506d29 729;; #
a4c4011b
AC
730;; # You should have received a copy of the GNU General Public License
731;; # along with this program; if not, write to the Free Software Foundation,
086add15 732;; # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.")
c063e381
KH
733 ))
734
735;; Generate a code of a Quail package in the current buffer from Tsang
736;; dictionary in the buffer DICBUF. The input method name of the
737;; Quail package is NAME, and the title string is TITLE.
738
739;; TSANG-P is non-nil, genereate \e$(06AQo\e(B input method. Otherwise
740;; generate \e$(0X|/y\e(B (simple version of \e$(06AQo\e(B). If BIG5-P is non-nil, the
741;; input method is for inputting Big5 characters. Otherwise the input
742;; method is for inputting CNS characters.
743
744(defun tsang-quick-converter (dicbuf name title tsang-p big5-p)
745 (let ((fulltitle (if tsang-p (if big5-p "\e$(06AQo\e(B" "\e$(GT?on\e(B")
746 (if big5-p "\e$(0X|/y\e(B" "\e$(Gv|Mx\e(B")))
747 dic)
748 (goto-char (point-max))
749 (if big5-p
750 (insert (format "\"\e$(0&d'GTT&,!J\e(B%s\e$(0!K\e(BBIG5
751
752 \e$(0KHM$\e(B%s\e$(0TT&,WoOu\e(B
753
754 [Q \e$(0'D\e(B] [W \e$(0(q\e(B] [E \e$(0'V\e(B] [R \e$(0&H\e(B] [T \e$(0'>\e(B] [Y \e$(0&4\e(B] [U \e$(0&U\e(B] [I \e$(0'B\e(B] [O \e$(0&*\e(B] [P \e$(0'A\e(B]
755
756 [A \e$(0'K\e(B] [S \e$(0&T\e(B] [D \e$(0'N\e(B] [F \e$(0'W\e(B] [G \e$(0&I\e(B] [H \e$(0*M\e(B] [J \e$(0&3\e(B] [L \e$(0&d\e(B]
a1506d29 757
c063e381
KH
758 [Z ] [X \e$(0[E\e(B] [C \e$(01[\e(B] [V \e$(0&M\e(B] [B \e$(0'M\e(B] [N \e$(0&_\e(B] [M \e$(0&"\e(B]
759
760\\\\<quail-translation-docstring>\"\n"
761 fulltitle fulltitle))
762 (insert (format "\"\e$(GDcEFrSD+!J\e(B%s\e$(G!K\e(BCNS
763
764 \e$(GiGk#\e(B%s\e$(GrSD+uomu\e(B
765
766 [Q \e$(GEC\e(B] [W \e$(GFp\e(B] [E \e$(GEU\e(B] [R \e$(GDG\e(B] [T \e$(GE=\e(B] [Y \e$(GD3\e(B] [U \e$(GDT\e(B] [I \e$(GEA\e(B] [O \e$(GD)\e(B] [P \e$(GE@\e(B]
767
768 [A \e$(GEJ\e(B] [S \e$(GDS\e(B] [D \e$(GEM\e(B] [F \e$(GEV\e(B] [G \e$(GDH\e(B] [H \e$(GHL\e(B] [J \e$(GD2\e(B] [L \e$(GDc\e(B]
a1506d29
JB
769
770 [Z ] [X \e$(GyE\e(B] [C \e$(GOZ\e(B] [V \e$(GDL\e(B] [B \e$(GEL\e(B] [N \e$(GD^\e(B] [M \e$(GD!\e(B]
c063e381
KH
771
772\\\\<quail-translation-docstring>\"\n"
773 fulltitle fulltitle)))
774 (insert " '((\".\" . quail-next-translation-block)
775 (\",\" . quail-prev-translation-block))
776 nil nil)\n\n")
777 (insert "(quail-define-rules\n")
778 (save-excursion
779 (set-buffer dicbuf)
6b61353c
KH
780 ;; Handle double CR line ends, which result when checking out of
781 ;; CVS on MS-Windows.
782 (goto-char (point-min))
c063e381
KH
783 (search-forward "A440")
784 (beginning-of-line)
785 (let ((table (make-hash-table :test 'equal))
786 val)
787 (while (not (eobp))
788 (forward-char 5)
789 (let ((trans (char-to-string (following-char)))
790 key slot)
8b735b2b 791 (re-search-forward "\\([A-Z]+\\)\r*$" nil t)
c063e381
KH
792 (setq key (downcase
793 (if (or tsang-p
8b735b2b
KH
794 (<= (- (match-end 1) (match-beginning 1)) 1))
795 (match-string 1)
796 (string (char-after (match-beginning 1))
797 (char-after (1- (match-end 1)))))))
c063e381
KH
798 (setq val (gethash key table))
799 (if val (setq trans (concat val trans)))
800 (puthash key trans table)
801 (forward-line 1)))
802 (maphash #'(lambda (key val) (setq dic (cons (cons key val) dic)))
803 table)))
804 (setq dic (sort dic (function (lambda (x y) (string< (car x ) (car y))))))
805 (dolist (elt dic)
806 (insert (format "(%S\t%S)\n" (car elt) (cdr elt))))
807 (let ((punctuations '((";" "\e$(0!'!2!"!#!.!/\e(B" "\e$(G!'!2!"!#!.!/\e(B")
808 (":" "\e$(0!(!+!3!%!$!&!0!1\e(B" "\e$(G!(!+!3!%!$!&!0!1\e(B")
809 ("'" "\e$(0!e!d\e(B" "\e$(G!e!d\e(B")
810 ("\"" "\e$(0!g!f!h!i!q\e(B" "\e$(G!g!f!h!i!q\e(B")
811 ("\\" "\e$(0"`"b#M\e(B" "\e$(G"`"b#M\e(B")
812 ("|" "\e$(0!6!8!:"^\e(B" "\e$(G!6!8!:"^\e(B")
813 ("/" "\e$(0"_"a#L\e(B" "\e$(G"_"a#L\e(B")
814 ("?" "\e$(0!)!4\e(B" "\e$(G!)!4\e(B")
815 ("<" "\e$(0!R"6"A!T"H\e(B" "\e$(G!R"6"A!T"H\e(B")
816 (">" "\e$(0!S"7"B!U\e(B" "\e$(G!S"7"B!U\e(B")
817 ("[" "\e$(0!F!J!b!H!L!V!Z!X!\\e(B" "\e$(G!F!J!b!H!L!V!Z!X!\\e(B")
818 ("]" "\e$(0!G!K!c!I!M!W![!Y!]\e(B" "\e$(G!G!K!c!I!M!W![!Y!]\e(B")
819 ("{" "\e$(0!B!`!D\e(B " "\e$(G!B!`!D\e(B ")
820 ("}" "\e$(0!C!a!E\e(B" "\e$(G!C!a!E\e(B")
821 ("`" "\e$(0!j!k\e(B" "\e$(G!j!k\e(B")
822 ("~" "\e$(0"D"+",!<!=\e(B" "\e$(G"D"+",!<!=\e(B")
823 ("!" "\e$(0!*!5\e(B" "\e$(G!*!5\e(B")
824 ("@" "\e$(0"i"n\e(B" "\e$(G"i"n\e(B")
825 ("#" "\e$(0!l"-\e(B" "\e$(G!l"-\e(B")
826 ("$" "\e$(0"c"l\e(B" "\e$(G"c"l\e(B")
827 ("%" "\e$(0"h"m\e(B" "\e$(G"h"m\e(B")
828 ("&" "\e$(0!m".\e(B" "\e$(G!m".\e(B")
829 ("*" "\e$(0!n"/!o!w!x\e(B" "\e$(G!n"/!o!w!x\e(B")
830 ("(" "\e$(0!>!^!@\e(B" "\e$(G!>!^!@\e(B")
831 (")" "\e$(0!?!_!A\e(B" "\e$(G!?!_!A\e(B")
832 ("-" "\e$(0!7!9"#"$"1"@\e(B" "\e$(G!7!9"#"$"1"@\e(B")
833 ("_" "\e$(0"%"&\e(B" "\e$(G"%"&\e(B")
834 ("=" "\e$(0"8"C\e(B" "\e$(G"8"C\e(B")
835 ("+" "\e$(0"0"?\e(B" "\e$(G"0"?\e(B"))))
836 (dolist (elt punctuations)
837 (insert (format "(%S %S)\n" (concat "z" (car elt))
838 (if big5-p (nth 1 elt) (nth 2 elt))))))
839 (insert ")\n")))
840
841(defun tsang-b5-converter (dicbuf name title)
842 (tsang-quick-converter dicbuf name title t t))
843
844(defun quick-b5-converter (dicbuf name title)
845 (tsang-quick-converter dicbuf name title nil t))
846
847(defun tsang-cns-converter (dicbuf name title)
848 (tsang-quick-converter dicbuf name title t nil))
849
850(defun quick-cns-converter (dicbuf name title)
851 (tsang-quick-converter dicbuf name title nil nil))
852
853;; Generate a code of a Quail package in the current buffer from
854;; Pinyin dictionary in the buffer DICBUF. The input method name of
855;; the Quail package is NAME, and the title string is TITLE.
856
857(defun py-converter (dicbuf name title)
858 (goto-char (point-max))
859 (insert (format "%S\n" "\e$A::WVJdHk!KF4Rt!K\e(B
860
861 \e$AF4Rt7=08\e(B
862
863 \e$AP!P4S"NDWVD84z1m!8F4Rt!97{:E#,\e(B \"u(yu) \e$ATrSC\e(B u: \e$A1mJ>!C\e(B
864
865Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312').
866
c3ff164a 867Pinyin is the standard roman transliteration method for Chinese.
c063e381
KH
868Pinyin uses a sequence of Latin alphabetic characters for each Chinese
869character. The sequence is made by the combination of the initials
870\(the beginning sounds) and finals (the ending sounds).
871
872 initials: b p m f d t n l z c s zh ch sh r j q x g k h
873 finals: a o e i er ai ei oa ou an en ang eng ong i ia iao ie iu ian in
874 iang ing iong u ua uo uai ui uan un uan ueng yu yue yuan yun
875
876 (Note: In the correct Pinyin writing, the sequence \"yu\" in the last
877 four finals should be written by the character u-umlaut `\e$A(9\e(B'.)
878
879With this input method, you enter a Chinese character by first
880entering its pinyin spelling.
881
882\\<quail-translation-docstring>
883
884For instance, to input \e$ADc\e(B, you type \"n i C-n 3\". The first \"n i\"
885is a Pinyin, \"C-n\" selects the next group of candidates (each group
886contains at most 10 characters), \"3\" select the third character in
887that group.
888
889This input method supports only Han characters. The related input
890method `chinese-py-punct' is the combination of this method and
891`chinese-punct'; it supports both Han characters and punctuation
892characters.
893
894For double-width GB2312 characters corresponding to ASCII, use the
895input method `chinese-qj'.
896
897The correct Pinyin system specifies tones by diacritical marks, but
898this input method doesn't use them, which results in easy (you don't
899have to know the exact tones), but verbose (many characters are assigned
900to the same key sequence) input. You may also want to try the input
901method `chinese-tonepy' with which you must specify tones by digits
902\(1..5)."))
903 (insert " '((\"\C-?\" . quail-delete-last-char)
904 (\".\" . quail-next-translation)
905 (\">\" . quail-next-translation)
906 (\",\" . quail-prev-translation)
907 (\"<\" . quail-prev-translation))
908 nil nil nil nil)\n\n")
909 (insert "(quail-define-rules\n")
910 (let ((pos (point)))
8b735b2b 911 (insert-buffer-substring-no-properties dicbuf)
c063e381 912 (goto-char pos)
8c8d1081 913 (re-search-forward "^[a-z]")
7d15a839
KH
914 (beginning-of-line)
915 (delete-region pos (point))
c063e381
KH
916 (while (not (eobp))
917 (insert "(\"")
918 (skip-chars-forward "a-z")
919 (insert "\" \"")
920 (delete-char 1)
921 (end-of-line)
8b735b2b
KH
922 (while (= (preceding-char) ?\r)
923 (delete-char -1))
c063e381
KH
924 (insert "\")")
925 (forward-line 1)))
926 (insert ")\n"))
927
928;; Generate a code of a Quail package in the current buffer from
929;; Ziranma dictionary in the buffer DICBUF. The input method name of
930;; the Quail package is NAME, and the title string is TITLE.
931
932(defun ziranma-converter (dicbuf name title)
933 (let (dic)
934 (save-excursion
935 (set-buffer dicbuf)
936 (goto-char (point-min))
8b735b2b
KH
937 (search-forward "\n%keyname end")
938 (forward-line 1)
c063e381
KH
939 (let ((table (make-hash-table :test 'equal))
940 elt pos key trans val)
941 (while (not (eobp))
942 (setq pos (point))
943 (skip-chars-forward "^ \t")
8b735b2b 944 (setq key (buffer-substring-no-properties pos (point)))
c063e381 945 (skip-chars-forward " \t")
8b735b2b
KH
946 (setq pos (point))
947 (skip-chars-forward "^\r\n")
948 (setq trans (vector (buffer-substring-no-properties pos (point))))
c063e381
KH
949 (setq val (gethash key table))
950 (if val (setq trans (vconcat val trans)))
951 (puthash key trans table)
952 (forward-line 1))
953 (maphash #'(lambda (key trans)
954 (let ((len (length trans))
955 i)
956 (if (and (= len 1) (= (length (aref trans 0)) 1))
957 (setq trans (aref trans 0))
958 (setq i 0)
959 (while (and (< i len)
960 (= (length (aref trans i)) 1))
961 (setq i (1+ i)))
962 (if (= i len)
963 (setq trans (mapconcat 'identity trans "")))))
964 (setq dic (cons (cons key trans) dic)))
965 table)))
966 (setq dic (sort dic (function (lambda (x y) (string< (car x) (car y))))))
967 (goto-char (point-max))
968 (insert (format "%S\n" "\e$A::WVJdHk!K!>WTH;!?!K\e(B
969
970 \e$A<|EL6TUU1m\e(B:
971 \e$A)3)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)7\e(B
972 \e$A)'#Q\e(B \e$A)'#W\e(B \e$A)'#E\e(B \e$A)'#R\e(B \e$A)'#T\e(B \e$A)'#Y\e(B \e$A)'#U\e(Bsh\e$A)'#I\e(Bch\e$A)'#O\e(B \e$A)'#P\e(B \e$A)'\e(B
973 \e$A)'\e(B iu\e$A)'\e(B ua\e$A)'\e(B e\e$A)'\e(B uan\e$A)'\e(B ue\e$A)'\e(B uai\e$A)'\e(B u\e$A)'\e(B i\e$A)'\e(B o\e$A)'\e(B un\e$A)'\e(B
974 \e$A)'\e(B \e$A)'\e(B ia\e$A)'\e(B \e$A)'\e(B van\e$A)'\e(B ve\e$A)'\e(B ing\e$A)'\e(B \e$A)'\e(B \e$A)'\e(B uo\e$A)'\e(B vn\e$A)'\e(B
975 \e$A);)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)?\e(B
976 \e$A)'#A\e(B \e$A)'#S\e(B \e$A)'#D\e(B \e$A)'#F\e(B \e$A)'#G\e(B \e$A)'#H\e(B \e$A)'#J\e(B \e$A)'#K\e(B \e$A)'#L\e(B \e$A)'\e(B
977 \e$A)'\e(B a\e$A)'\e(Biong\e$A)'\e(Buang\e$A)'\e(B en\e$A)'\e(B eng\e$A)'\e(B ang\e$A)'\e(B an\e$A)'\e(B ao\e$A)'\e(B ai\e$A)'\e(B
978 \e$A)'\e(B \e$A)'\e(B ong\e$A)'\e(Biang\e$A)'\e(B \e$A)'\e(B ng\e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B
979 \e$A);)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)%)7\e(B
980 \e$A)'#Z\e(B \e$A)'#X\e(B \e$A)'#C\e(B \e$A)'#V\e(Bzh\e$A)'#B\e(B \e$A)'#N\e(B \e$A)'#M\e(B \e$A)'#,\e(B \e$A)'#.\e(B \e$A)'\e(B \e$A#/\e(B \e$A)'\e(B
981 \e$A)'\e(B ei\e$A)'\e(B ie\e$A)'\e(B iao\e$A)'\e(B ui\e$A)'\e(B ou\e$A)'\e(B in\e$A)'\e(B ian\e$A)'G0R3)':sR3)'7{:E)'\e(B
982 \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B v\e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B
983 \e$A);)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)?\e(B
984
985
986Pinyin base input method for Chinese GB2312 characters (`chinese-gb2312').
987
988Pinyin is the standard roman transliteration method for Chinese.
989For the details of Pinyin system, see the documentation of the input
990method `chinese-py'.
991
992Unlike the standard spelling of Pinyin, in this input method all
993initials and finals are assigned to single keys (see the above table).
994For instance, the initial \"ch\" is assigned to the key `i', the final
995\"iu\" is assigned to the key `q', and tones 1, 2, 3, 4, and \e$AGaIy\e(B are
996assigned to the keys `q', `w', `e', `r', `t' respectively.
997
998\\<quail-translation-docstring>
999
1000To input one-letter words, you type 4 keys, the first two for the
1001Pinyin of the letter, next one for tone, and the last one is always a
1002quote ('). For instance, \"vsq'\" input \e$AVP\e(B. Exceptions are these
1003letters. You can input them just by typing a single key.
1004
1005 Character: \e$A04\e(B \e$A2;\e(B \e$A4N\e(B \e$A5D\e(B \e$A6~\e(B \e$A7"\e(B \e$A8v\e(B \e$A:M\e(B \e$A3v\e(B \e$A<0\e(B \e$A?I\e(B \e$AAK\e(B \e$AC;\e(B
1006 Key: a b c d e f g h i j k l m
1007 Character: \e$ADc\e(B \e$AE7\e(B \e$AF,\e(B \e$AF_\e(B \e$AHK\e(B \e$AH}\e(B \e$AK{\e(B \e$AJG\e(B \e$AWE\e(B \e$ANR\e(B \e$AP!\e(B \e$AR;\e(B \e$ATZ\e(B
1008 Key: n o p q r s t u v w x y z
1009
1010To input two-letter words, you have two ways. One way is to type 4
1011keys, two for the first Pinyin, two for the second Pinyin. For
1012instance, \"vsgo\" inputs \e$AVP9z\e(B. Another way is to type 3 keys: 2
1013initials of two letters, and quote ('). For instance, \"vg'\" also
1014inputs \e$AVP9z\e(B.
1015
1016To input three-letter words, you type 4 keys: initials of three
1017letters, and the last is quote ('). For instance, \"bjy'2\" inputs \e$A11\e(B
1018\e$A>)Q<\e(B (the last `2' is to select one of the candidates).
1019
1020To input words of more than three letters, you type 4 keys, initials
1021of the first three letters and the last letter. For instance,
1022\"bjdt\" inputs \e$A11>)5gJSL(\e(B.
1023
1024To input symbols and punctuations, type `/' followed by one of `a' to
1025`z', then select one of the candidates."))
1026 (insert " '((\"\C-?\" . quail-delete-last-char)
1027 (\".\" . quail-next-translation)
1028 (\"[\" . quail-next-translation)
1029 (\",\" . quail-prev-translation)
1030 (\"]\" . quail-prev-translation))
1031 nil nil nil nil)\n\n")
1032 (insert "(quail-define-rules\n")
1033 (dolist (elt dic)
1034 (insert (format "(%S %S)\n" (car elt) (cdr elt))))
1035 (insert ")\n")))
1036
a4c4011b
AC
1037;; Generate the code for a Quail package in the current buffer from a
1038;; CTLau or CTLau-b5 dictionary in the buffer DICBUF. The input
1039;; method name of the Quail package is NAME, and the title string is
1040;; TITLE. DESCRIPTION is the string shown by describe-input-method.
1041
1042(defun ctlau-converter (dicbuf name title description)
1043 (goto-char (point-max))
1044 (insert (format "%S\n" description))
1045 (insert " '((\"\C-?\" . quail-delete-last-char)
1046 (\".\" . quail-next-translation)
1047 (\">\" . quail-next-translation)
1048 (\",\" . quail-prev-translation)
1049 (\"<\" . quail-prev-translation))
1050 nil nil nil nil)\n\n")
1051 (insert "(quail-define-rules\n")
1052 (let (dicbuf-start dicbuf-end key-start key (pos (point)))
1053 ;; Find the dictionary, which starts below a horizontal rule and
1054 ;; ends at the second to last line in the HTML file.
1055 (save-excursion
1056 (set-buffer dicbuf)
1057 (goto-char (point-min))
8b735b2b
KH
1058 (re-search-forward "^#<hr>")
1059 (forward-line 1)
a4c4011b
AC
1060 (setq dicbuf-start (point))
1061 (goto-char (point-max))
8b735b2b 1062 (re-search-backward "^<hr>")
a4c4011b 1063 (setq dicbuf-end (point)))
8b735b2b 1064 (insert-buffer-substring-no-properties dicbuf dicbuf-start dicbuf-end)
a4c4011b
AC
1065 ;; CTLau-b5.html contains characters (0xa1 0xbc) which show up as
1066 ;; hollow boxes when the original characters in CTLau.html from
1067 ;; which the file is converted have no Big5 equivalent. Go
1068 ;; through and delete them.
1069 (goto-char pos)
1070 (while (search-forward "\e$(0!{\e(B" nil t)
1071 (delete-char -1))
1072 ;; Uppercase keys in dictionary need to be downcased. Backslashes
1073 ;; at the beginning of keys need to be turned into double
1074 ;; backslashes.
1075 (goto-char pos)
1076 (while (not (eobp))
1077 (insert "(\"")
1078 (if (char-equal (following-char) ?\\)
1079 (insert "\\"))
1080 (setq key-start (point))
1081 (skip-chars-forward "\\\\A-Z")
1082 (downcase-region key-start (point))
1083 (insert "\" \"")
1084 (delete-char 1)
1085 (end-of-line)
8b735b2b
KH
1086 (while (= (preceding-char) ?\r)
1087 (delete-char -1))
a4c4011b
AC
1088 (insert "\")")
1089 (forward-line 1)))
1090 (insert ")\n"))
1091
1092(defun ctlau-gb-converter (dicbuf name title)
1093 (ctlau-converter dicbuf name title
1094"\e$A::WVJdHk!KAuN}OiJ=TARt!K\e(B
1095
1096 \e$AAuN}OiJ=TASoW"Rt7=08\e(B
1097 Sidney Lau's Cantonese transcription scheme as described in his book
1098 \"Elementary Cantonese\", The Government Printer, Hong Kong, 1972.
1099 This file was prepared by Fung Fung Lee (\e$A@n7c7e\e(B).
1100 Originally converted from CTCPS3.tit
1101 Last modified: June 2, 1993.
1102
4e7e1f03 1103 Some infrequent GB characters are accessed by typing \\, followed by
a4c4011b
AC
1104 the Cantonese romanization of the respective radical (\e$A2?JW\e(B)."))
1105
1106(defun ctlau-b5-converter (dicbuf name title)
1107 (ctlau-converter dicbuf name title
1108"\e$(0KH)tTT&,!(N,Tg>A*#Gn5x!(\e(B
1109
1110 \e$(0N,Tg>A*#GnM$0D5x'J7{\e(B
1111 Sidney Lau's Cantonese transcription scheme as described in his book
1112 \"Elementary Cantonese\", The Government Printer, Hong Kong, 1972.
1113 This file was prepared by Fung Fung Lee (\e$(0,XFS76\e(B).
1114 Originally converted from CTCPS3.tit
1115 Last modified: June 2, 1993.
1116
4e7e1f03 1117 Some infrequent characters are accessed by typing \\, followed by
a4c4011b
AC
1118 the Cantonese romanization of the respective radical (\e$(0?f5}\e(B)."))
1119
5cec3056 1120(declare-function dos-8+3-filename "dos-fns.el" (filename))
73e6adaa 1121
c063e381 1122(defun miscdic-convert (filename &optional dirname)
a1506d29 1123 "Convert a dictionary file FILENAME into a Quail package.
c063e381
KH
1124Optional argument DIRNAME if specified is the directory name under which
1125the generated Quail package is saved."
1126 (interactive "FInput method dictionary file: ")
1127 (or (file-readable-p filename)
1128 (error "%s does not exist" filename))
1129 (let ((tail quail-misc-package-ext-info)
98223b73 1130 coding-system-for-write
c063e381
KH
1131 slot
1132 name title dicfile coding quailfile converter copyright
1133 dicbuf)
1134 (while tail
d1df889e
KH
1135 (setq slot (car tail)
1136 dicfile (nth 2 slot)
1137 quailfile (nth 4 slot))
1138 (when (and (or (string-match dicfile filename)
1139 ;; MS-DOS filesystem truncates file names to 8+3
1140 ;; limits, so "cangjie-table.cns" becomes
1141 ;; "cangjie-.cns", and the above string-match
1142 ;; fails. Give DOS users a chance...
1143 (and (fboundp 'msdos-long-file-names)
1144 (not (msdos-long-file-names))
1145 (string-match (dos-8+3-filename dicfile) filename)))
1146 (if (file-newer-than-file-p
1147 filename (expand-file-name quailfile dirname))
1148 t
1149 (message "%s is up to date" quailfile)
1150 nil))
1151 (setq name (car slot)
c063e381 1152 title (nth 1 slot)
c063e381 1153 coding (nth 3 slot)
c063e381
KH
1154 converter (nth 5 slot)
1155 copyright (nth 6 slot))
1156 (message "Converting %s to %s..." dicfile quailfile)
f9362982
KH
1157 ;; Explicitly set eol format to `unix'.
1158 (setq coding-system-for-write
1159 (coding-system-change-eol-conversion coding 'unix))
c063e381 1160 (with-temp-file (expand-file-name quailfile dirname)
bb5b9e9d
MB
1161 (insert (format ";; Quail package `%s' -*- coding:%s; " name coding))
1162 (insert "byte-compile-disable-print-circle:t; -*-\n")
c063e381
KH
1163 (insert ";; Generated by the command `miscdic-convert'\n")
1164 (insert ";; Date: " (current-time-string) "\n")
1165 (insert ";; Source dictionary file: " dicfile "\n")
1166 (insert ";; Copyright notice of the source file\n")
1167 (insert ";;------------------------------------------------------\n")
1168 (insert copyright "\n")
1169 (insert ";;------------------------------------------------------\n")
1170 (insert "\n")
1171 (insert ";;; Code:\n\n")
1172 (insert "(require 'quail)\n")
1173 (insert "(quail-define-package \"" name "\" \""
98223b73
KH
1174 (if (eq coding 'big5) "Chinese-BIG5"
1175 (if (eq coding 'iso-2022-cn-ext) "Chinese-CNS"
1176 "Chinese-GB"))
c063e381 1177 "\" \"" title "\" t\n")
8b735b2b
KH
1178 (let* ((coding-system-for-read
1179 (coding-system-change-eol-conversion coding 'unix))
c063e381
KH
1180 (dicbuf (find-file-noselect filename)))
1181 (funcall converter dicbuf name title)
1182 (kill-buffer dicbuf)))
1183 (message "Converting %s to %s...done" dicfile quailfile))
1184 (setq tail (cdr tail)))))
1185
1186(defun batch-miscdic-convert ()
1187 "Run `miscdic-convert' on the files remaing on the command line.
1188Use this from the command line, with `-batch';
1189it won't work in an interactive Emacs.
1190If there's an argument \"-dir\", the next argument specifies a directory
1191to store generated Quail packages."
1192 (defvar command-line-args-left) ; Avoid compiler warning.
1193 (if (not noninteractive)
1194 (error "`batch-miscdic-convert' should be used only with -batch"))
1195 (let ((dir default-directory)
1196 filename)
1197 (while command-line-args-left
1198 (if (string= (car command-line-args-left) "-dir")
1199 (progn
1200 (setq command-line-args-left (cdr command-line-args-left))
0631f894
KH
1201 (setq dir (car command-line-args-left))
1202 (setq command-line-args-left (cdr command-line-args-left))))
c063e381
KH
1203 (setq filename (car command-line-args-left)
1204 command-line-args-left (cdr command-line-args-left))
1205 (if (file-directory-p filename)
1206 (dolist (file (directory-files filename t nil t))
98223b73
KH
1207 (or (file-directory-p file)
1208 (miscdic-convert file dir)))
c063e381
KH
1209 (miscdic-convert filename dir))))
1210 (kill-emacs 0))
1211
5cdaf2a5
KH
1212;; Local Variables:
1213;; coding: iso-2022-7bit
1214;; End:
60370d40 1215
cbee283d 1216;; arch-tag: 8ad478b2-a985-4da2-b47f-d8ee5d7c24a3
60370d40 1217;;; titdic-cnv.el ends here