Update FSF's address.
[bpt/emacs.git] / lisp / international / titdic-cnv.el
CommitLineData
c7211fed 1;;; titdic-cnv.el --- convert cxterm dictionary (TIT format) to Quail package -*- coding:iso-2022-7bit; -*-
4ed46869 2
2fd125a3
KH
3;; Copyright (C) 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
4;; Copyright (C) 1995, 1997, 1998, 2000, 2001, 2002
5;; National Institute of Advanced Industrial Science and Technology (AIST)
6;; Registration Number H14PRO021
4ed46869
KH
7
8;; Keywords: Quail, TIT, cxterm
9
10;; This file is part of GNU Emacs.
11
12;; GNU Emacs is free software; you can redistribute it and/or modify
13;; it under the terms of the GNU General Public License as published by
14;; the Free Software Foundation; either version 2, or (at your option)
15;; any later version.
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
369314dc 23;; along with GNU Emacs; see the file COPYING. If not, write to the
3a35cf56
LK
24;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25;; Boston, MA 02110-1301, USA.
4ed46869 26
60370d40 27;;; Commentary:
4ed46869 28
49ed466f 29;; Convert cxterm dictionary (of TIT format) to quail-package.
4ed46869
KH
30;;
31;; Usage (within Emacs):
49ed466f 32;; M-x titdic-convert<CR>CXTERM-DICTIONARY-NAME<CR>
4ed46869 33;; Usage (from shell):
49ed466f 34;; % emacs -batch -l titdic-cnv -f batch-titdic-convert\
4ed46869
KH
35;; [-dir DIR] [DIR | FILE] ...
36;;
37;; When you run titdic-convert within Emacs, you have a chance to
38;; modify arguments of `quail-define-package' before saving the
39;; converted file. For instance, you are likely to modify TITLE,
40;; DOCSTRING, and KEY-BINDINGS.
41
49ed466f 42;; Cxterm dictionary file (*.tit) is a line-oriented text (English,
4ed46869
KH
43;; Chinese, Japanese, and Korean) file. The whole file contains of
44;; two parts, the definition part (`header' here after) followed by
45;; the dictionary part (`body' here after). All lines begin with
46;; leading '#' are ignored.
47;;
48;; Each line in the header part has two fields, KEY and VALUE. These
49;; fields are separated by one or more white characters.
50;;
51;; Each line in the body part has two fields, KEYSEQ and TRANSLATIONS.
52;; These fields are separated by one or more white characters.
53;;
54;; See the manual page of `tit2cit' of cxterm distribution for more
55;; detail.
c063e381 56;;
b138056a 57;; Near the end of this file, we also have a few other tools to convert
c063e381 58;; miscellaneous dictionaries.
4ed46869
KH
59
60;;; Code:
61
62(require 'quail)
63
49ed466f 64;; List of values of key "ENCODE:" and the corresponding Emacs
4ed46869
KH
65;; coding-system and language environment name.
66(defvar tit-encode-list
a7f2c216
KH
67 '(("GB" euc-china "Chinese-GB")
68 ("BIG5" cn-big5 "Chinese-BIG5")
69 ("JIS" euc-japan "Japanese")
49ed466f
KH
70 ("KS" euc-kr "Korean")))
71
4558e816
KH
72;; Alist of input method names and the corresponding title and extra
73;; docstring. For each of input method generated from TIT dictionary,
74;; a docstring is automatically generated from the comments in the
75;; dictionary. The extra docstring in this alist is to add more
76;; information.
77;; The command describe-input-method shows the automatically generated
c7211fed 78;; docstring, then an extra docstring while replacing the form \<VAR>
4558e816
KH
79;; by the value of variable VAR. For instance, the form
80;; \<quail-translation-docstring> is replaced by a description about
81;; how to select a translation from a list of candidates.
82
6b1e079c
KH
83(defvar quail-cxterm-package-ext-info
84 '(("chinese-4corner" "\e$(0(?-F\e(B")
85 ("chinese-array30" "\e$(0#R#O\e(B")
4558e816
KH
86 ("chinese-ccdospy" "\e$AKuF4\e(B"
87 "Pinyin base input method for Chinese charset GB2312 \(`chinese-gb2312').
88
c7211fed 89Pinyin is the standard Roman transliteration method for Chinese.
4558e816
KH
90For the detail of Pinyin system, see the documentation of the input
91method `chinese-py'.
92
93This input method works almost the same way as `chinese-py'. The
94difference is that you type a single key for these Pinyin spelling.
95 Pinyin: zh en eng ang ch an ao ai ong sh ing yu(\e$A(9\e(B)
96 keyseq: a f g h i j k l s u y v
c7211fed 97For example:
4558e816
KH
98 Chinese: \e$A0!\e(B \e$A9{\e(B \e$AVP\e(B \e$AND\e(B \e$A9b\e(B \e$ASq\e(B \e$AH+\e(B
99 Pinyin: a guo zhong wen guang yu quan
100 Keyseq: a1 guo4 as1 wf4 guh1 yu..6 qvj6
101
102\\<quail-translation-docstring>
103
104For double-width GB2312 characters correponding to ASCII, use the
105input method `chinese-qj'.")
106
4558e816
KH
107 ("chinese-ecdict" "\e$(05CKH\e(B"
108"In this input method, you enter a Chinese (Big5) charactere or word
109by typing the corresponding English word. For example, if you type
110\"computer\", \"\e$(0IZH+\e(B\" is input.
111
112\\<quail-translation-docstring>")
113
114 ("chinese-etzy" "\e$(06/0D\e(B"
115"Zhuyin base input method for Chinese Big5 characters (`chinese-big5-1',
116`chinese-big5-2').
117
118Zhuyin is a kind of phonetic symbol. One to three Zhuyin symbols
119compose one Chinese character.
120
121In this input method, you enter a Chinese character by first typing
122keys corresponding to Zhuyin symbols (see the above table) followed by
123SPC, 1, 2, 3, or 4 specifing a tone (SPC:\e$(0?v(N\e(B, 1:\e$(0M=Vy\e(B, 2:\e$(0Dm(N\e(B, 3: \e$(0&9Vy\e(B,
1244:\e$(0(+Vy\e(B).
125
126\\<quail-translation-docstring>")
6b1e079c
KH
127
128 ("chinese-punct-b5" "\e$(0O:\e(BB"
129 "Input method for Chinese punctuations and symbols of Big5
130\(`chinese-big5-1' and `chinese-big5-2').")
131
132 ("chinese-punct" "\e$A1j\e(BG"
133 "Input method for Chinese punctuations and symbols of GB2312
134\(`chinese-gb2312').")
135
136 ("chinese-py-b5" "\e$(03<\e(BB"
137 "Pinyin base input method for Chinese Big5 characters
138\(`chinese-big5-1', `chinese-big5-2').
139
140This input method works almost the same way as `chinese-py' (which
141see).
142
143This input method supports only Han characters. The more convenient
43b11fee
EZ
144method is `chinese-py-punct-b5', which is the combination of this
145method and `chinese-punct-b5' and which supports both Han characters
146and punctuation/symbols.
6b1e079c 147
43b11fee 148For double-width Big5 characters corresponding to ASCII, use the input
6b1e079c
KH
149method `chinese-qj-b5'.
150
151The input method `chinese-py' and `chinese-tonepy' are also Pinyin
43b11fee 152based, but for the character set GB2312 (`chinese-gb2312').")
6b1e079c 153
4558e816
KH
154 ("chinese-qj-b5" "\e$(0)A\e(BB")
155
156 ("chinese-qj" "\e$AH+\e(BG")
157
6b1e079c 158 ("chinese-sw" "\e$AJWN2\e(B"
4558e816
KH
159"Radical base input method for Chinese charset GB2312 (`chinese-gb2312').
160
d20faceb
EZ
161In this input method, you enter a Chinese character by typing two
162keys. The first key corresponds to the first (\e$AJW\e(B) radical, the second
163key corresponds to the last (\e$AN2\e(B) radical. The correspondence of keys
164and radicals is as below:
4558e816
KH
165
166 first radical:
167 a b c d e f g h i j k l m n o p q r s t u v w x y z
a1506d29 168 \e$APD\e(B \e$AZ"\e(B \e$AJ,\e(B \e$AX<\e(B \e$A;p\e(B \e$A?Z\e(B \e$A^P\e(B \e$Ac_\e(B \e$AZ%\e(B \e$A\3\e(B \e$AXi\e(B \e$AD>\e(B \e$Alj\e(B \e$Ab;\e(B \e$ATB\e(B \e$Afy\e(B \e$AJ/\e(B \e$AMu\e(B \e$A0K\e(B \e$AX/\e(B \e$AHU\e(B \e$AeA\e(B \e$Aak\e(B \e$AVq\e(B \e$AR;\e(B \e$AHK\e(B
4558e816
KH
169 last radical:
170 a b c d e f g h i j k l m n o p q r s t u v w x y z
a1506d29 171 \e$ASV\e(B \e$AI=\e(B \e$AMA\e(B \e$A56\e(B \e$AZb\e(B \e$A?Z\e(B \e$ARB\e(B \e$Aqb\e(B \e$A4s\e(B \e$A6!\e(B \e$A[L\e(B \e$Ala\e(B \e$AJ.\e(B \e$A4u\e(B \e$AXg\e(B \e$ACE\e(B \e$A=q\e(B \e$AX-\e(B \e$AE.\e(B \e$ARR\e(B \e$A`m\e(B \e$AP!\e(B \e$A3'\e(B \e$A3f\e(B \e$A_.\e(B \e$A27\e(B
4558e816 172
43e5a7fe 173\\<quail-translation-docstring>")
4558e816 174
6b1e079c
KH
175 ("chinese-tonepy" "\e$A5wF4\e(B"
176 "Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312').
177
c3ff164a 178Pinyin is the standard roman transliteration method for Chinese.
d20faceb 179For the details of Pinyin system, see the documentation of the input
6b1e079c
KH
180method `chinese-py'.
181
182This input method works almost the same way as `chinese-py'. The
4558e816
KH
183difference is that you must type 1..5 after each Pinyin spelling to
184specify a tone (1:\e$ARuF=\e(B, 2:\e$AQtF=\e(B, 3:\e$AIOIy\e(B, 4\e$AOBIy\e(B, 5:\e$AGaIy\e(B).
185
43e5a7fe 186\\<quail-translation-docstring>
4558e816
KH
187
188For instance, to input \e$ADc\e(B, you type \"n i 3 3\", the first \"n i\" is
189a Pinyin, the next \"3\" specifies tone, and the last \"3\" selects
190the third character from the candidate list.
6b1e079c
KH
191
192For double-width GB2312 characters correponding to ASCII, use the
193input method `chinese-qj'.")
194
4558e816
KH
195 ("chinese-zozy" "\e$(0I\0D\e(B"
196"Zhuyin base input method for Chinese Big5 characters (`chinese-big5-1',
197`chinese-big5-2').
198
d20faceb 199Zhuyin is a kind of a phonetic symbol. One to three Zhuyin symbols
4558e816
KH
200compose a Chinese character.
201
202In this input method, you enter a Chinese character by first typing
203keys corresponding to Zhuyin symbols (see the above table) followed by
204SPC, 6, 3, 4, or 7 specifing a tone (SPC:\e$(0?v(N\e(B, 6:\e$(0Dm(N\e(B, 3:\e$(0&9Vy\e(B, 4:\e$(0(+Vy\e(B,
2057:\e$(0M=Vy\e(B).
206
43e5a7fe 207\\<quail-translation-docstring>")))
4ed46869
KH
208
209;; Return a value of the key in the current line.
210(defsubst tit-read-key-value ()
86e7801e 211 (if (looking-at "[^ \t\r\n]+")
4ed46869
KH
212 (car (read-from-string (concat "\"" (match-string 0) "\"")))))
213
214;; Return an appropriate quail-package filename from FILENAME (TIT
49ed466f
KH
215;; dictionary filename). For instance, ".../ZOZY.tit" -> "ZOZY.el".
216(defun tit-make-quail-package-file-name (filename &optional dirname)
4ed46869 217 (expand-file-name
49ed466f 218 (concat (file-name-nondirectory (substring filename 0 -4)) ".el")
4ed46869
KH
219 dirname))
220
1375754c 221;; This value is nil if we are processing phrase dictionary.
8c1ccc6c 222(defvar tit-dictionary t)
4ed46869
KH
223(defvar tit-encode nil)
224(defvar tit-default-encode "GB")
225
226;; Generate elements of KEY-BINDINGS arg for `quail-define-package' so
227;; that each characters in KEYS invokes FUNCTION-SYMBOL.
228(defun tit-generate-key-bindings (keys function-symbol)
229 (let ((len (length keys))
230 (i 0)
1375754c 231 (first t)
4ed46869
KH
232 key)
233 (while (< i len)
1375754c 234 (or first (princ "\n "))
4ed46869 235 (setq key (aref keys i))
1375754c
KH
236 (if (if (< key ?\ )
237 (eq (lookup-key quail-translation-keymap
238 (char-to-string key))
4ed46869 239 'quail-execute-non-quail-command)
1375754c
KH
240 (<= key 127))
241 (progn
242 (princ (cons (cond ((< key ?\ ) (format "\"\\C-%c\"" (+ key ?@)))
243 ((< key 127) (format "\"%c\"" key))
244 (t "\"\\C-?\""))
245 function-symbol))
246 (setq first nil)))
4ed46869
KH
247 (setq i (1+ i)))))
248
249;; Analyze header part of TIT dictionary and generate an appropriate
250;; `quail-define-package' function call.
251(defun tit-process-header (filename)
252 (message "Processing header part...")
253 (goto-char (point-min))
254
1375754c
KH
255 ;; At first, generate header part of the Quail package while
256 ;; collecting information from the original header.
257 (let ((package (concat
258 "chinese-"
259 (substring (downcase (file-name-nondirectory filename))
260 0 -4)))
261 ;; TIT keywords and the corresponding default values.
4ed46869
KH
262 (tit-multichoice t)
263 (tit-prompt "")
264 (tit-comments nil)
265 (tit-backspace "\010\177")
266 (tit-deleteall "\015\025")
267 (tit-moveright ".>")
268 (tit-moveleft ",<")
269 (tit-keyprompt nil))
1375754c
KH
270
271 (princ ";; Quail package `")
c7211fed
DL
272 (princ package) (princ "' -*- coding:iso-2022-7bit; -*-\n")
273 (princ ";; Generated by the command `titdic-convert'\n;;\tDate: ")
1375754c
KH
274 (princ (current-time-string))
275 (princ "\n;;\tOriginal TIT dictionary file: ")
276 (princ (file-name-nondirectory filename))
277 (princ "\n\n;;; Comment:\n\n")
86e4f7c0 278 (princ ";; Byte-compile this file again after any modification.\n\n")
1375754c
KH
279 (princ ";;; Start of the header of original TIT dictionary.\n\n")
280
4ed46869 281 (while (not (eobp))
1375754c
KH
282 (let ((ch (following-char))
283 (pos (point)))
4ed46869
KH
284 (cond ((= ch ?C) ; COMMENT
285 (cond ((looking-at "COMMENT")
286 (let ((pos (match-end 0)))
287 (end-of-line)
4ed46869
KH
288 (setq tit-comments (cons (buffer-substring pos (point))
289 tit-comments))))))
290 ((= ch ?M) ; MULTICHOICE, MOVERIGHT, MOVELEFT
291 (cond ((looking-at "MULTICHOICE:[ \t]*")
292 (goto-char (match-end 0))
293 (setq tit-multichoice (looking-at "YES")))
294 ((looking-at "MOVERIGHT:[ \t]*")
295 (goto-char (match-end 0))
296 (setq tit-moveright (tit-read-key-value)))
297 ((looking-at "MOVELEFT:[ \t]*")
298 (goto-char (match-end 0))
299 (setq tit-moveleft (tit-read-key-value)))))
300 ((= ch ?P) ; PROMPT
301 (cond ((looking-at "PROMPT:[ \t]*")
302 (goto-char (match-end 0))
6b1e079c
KH
303 (setq tit-prompt (tit-read-key-value))
304 ;; Some TIT dictionaies that are encoded by
305 ;; euc-china contains invalid character at the tail.
306 (let* ((last (aref tit-prompt (1- (length tit-prompt))))
307 (split (split-char last)))
308 (if (or (eq (nth 1 split) 32)
309 (eq (nth 2 split) 32))
310 (setq tit-prompt (substring tit-prompt 0 -1)))))))
4ed46869
KH
311 ((= ch ?B) ; BACKSPACE, BEGINDICTIONARY,
312 ; BEGINPHRASE
313 (cond ((looking-at "BACKSPACE:[ \t]*")
314 (goto-char (match-end 0))
315 (setq tit-backspace (tit-read-key-value)))
316 ((looking-at "BEGINDICTIONARY")
1375754c 317 (setq tit-dictionary t))
4ed46869 318 ((looking-at "BEGINPHRASE")
1375754c 319 (setq tit-dictionary nil))))
4ed46869
KH
320 ((= ch ?K) ; KEYPROMPT
321 (cond ((looking-at "KEYPROMPT(\\(.*\\)):[ \t]*")
322 (let ((key-char (match-string 1)))
323 (goto-char (match-end 0))
1fa1e1f5
RS
324 (if (string-match "\\\\[0-9]+" key-char)
325 (setq key-char
326 (car (read-from-string (format "\"%s\""
327 key-char)))))
4ed46869
KH
328 (setq tit-keyprompt
329 (cons (cons key-char (tit-read-key-value))
1375754c
KH
330 tit-keyprompt)))))))
331 (end-of-line)
332 (princ ";; ")
333 (princ (buffer-substring pos (point)))
334 (princ "\n")
335 (forward-line 1)))
a1506d29 336
1375754c
KH
337 (princ "\n;;; End of the header of original TIT dictionary.\n\n")
338 (princ ";;; Code:\n\n(require 'quail)\n\n")
339
340 (princ "(quail-define-package ")
341 ;; Args NAME, LANGUAGE, TITLE
6b1e079c 342 (let ((title (nth 1 (assoc package quail-cxterm-package-ext-info))))
1375754c
KH
343 (princ "\"")
344 (princ package)
345 (princ "\" \"")
346 (princ (nth 2 (assoc tit-encode tit-encode-list)))
347 (princ "\" \"")
348 (princ (or title
349 (if (string-match "[:\e$A!K\e$(0!(!J\e(B]+\\([^:\e$A!K\e$(0!(!K\e(B]+\\)" tit-prompt)
350 (substring tit-prompt (match-beginning 1) (match-end 1))
351 tit-prompt)))
352 (princ "\"\n"))
4ed46869
KH
353
354 ;; Arg GUIDANCE
355 (if tit-keyprompt
356 (progn
1375754c 357 (princ " '(")
4ed46869 358 (while tit-keyprompt
1375754c
KH
359 (princ " ")
360 (princ (format "(%d . \"%s\")\n"
361 (string-to-char (car (car tit-keyprompt)))
362 (cdr (car tit-keyprompt))))
4ed46869 363 (setq tit-keyprompt (cdr tit-keyprompt)))
1375754c
KH
364 (princ ")"))
365 (princ " t\n"))
4ed46869
KH
366
367 ;; Arg DOCSTRING
6b1e079c
KH
368 (let ((doc (concat tit-prompt "\n"))
369 (comments (if tit-comments
370 (mapconcat 'identity (nreverse tit-comments) "\n")))
371 (doc-ext (nth 2 (assoc package quail-cxterm-package-ext-info))))
372 (if comments
373 (setq doc (concat doc "\n" comments "\n")))
374 (if doc-ext
375 (setq doc (concat doc "\n" doc-ext "\n")))
376 (prin1 doc)
377 (terpri))
4ed46869
KH
378
379 ;; Arg KEY-BINDINGS
1375754c 380 (princ " '(")
4ed46869 381 (tit-generate-key-bindings tit-backspace 'quail-delete-last-char)
1375754c 382 (princ "\n ")
4ed46869 383 (tit-generate-key-bindings tit-deleteall 'quail-abort-translation)
1375754c 384 (princ "\n ")
4ed46869 385 (tit-generate-key-bindings tit-moveright 'quail-next-translation)
1375754c 386 (princ "\n ")
4ed46869 387 (tit-generate-key-bindings tit-moveleft 'quail-prev-translation)
1375754c 388 (princ ")\n")
4ed46869
KH
389
390 ;; Args FORGET-TRANSLATION, DETERMINISTIC, KBD-TRANSLATE, SHOW-LAYOUT.
391 ;; The remaining args are all nil.
1375754c
KH
392 (princ " nil")
393 (princ (if tit-multichoice " nil" " t"))
394 (princ (if tit-keyprompt " t t)\n\n" " nil nil)\n\n"))))
395
396(defsubst tit-flush-translations (key translations)
397 (if (string-match "\\\\[0-9][0-9][0-9]" key)
398 (let ((newkey (concat (substring key 0 (match-beginning 0))
399 (car (read-from-string
400 (concat "\"" (match-string 0 key) "\"")))))
401 (idx (match-end 0)))
402 (while (string-match "\\\\[0-9][0-9][0-9]" key idx)
403 (setq newkey (concat
404 newkey
405 (substring key idx (match-beginning 0))
406 (car (read-from-string
407 (concat "\"" (match-string 0 key) "\"")))))
408 (setq idx (match-end 0)))
409 (setq key (concat newkey (substring key idx)))))
410 (prin1 (list key (if tit-dictionary translations
411 (vconcat (nreverse translations)))))
412 (princ "\n"))
4ed46869
KH
413
414;; Convert body part of TIT dictionary into `quail-define-rules'
415;; function call.
416(defun tit-process-body ()
417 (message "Formatting translation rules...")
1375754c
KH
418 (let* ((template (list nil nil))
419 (second (cdr template))
420 (prev-key "")
421 ch key translations pos)
422 (princ "(quail-define-rules\n")
4ed46869 423 (while (null (eobp))
1375754c
KH
424 (setq ch (following-char))
425 (if (or (= ch ?#) (= ch ?\n))
426 (forward-line 1)
4ed46869 427 (setq pos (point))
1375754c
KH
428 (skip-chars-forward "^ \t\n")
429 (setq key (buffer-substring pos (point)))
4ed46869 430 (skip-chars-forward " \t")
1375754c
KH
431 (setq ch (following-char))
432 (if (or (= ch ?#) (= ch ?\n))
08a1bf22 433 ;; This entry contains no translations. Let's ignore it.
1375754c
KH
434 (forward-line 1)
435 (or (string= key prev-key)
08a1bf22 436 (progn
1375754c
KH
437 (if translations
438 (tit-flush-translations prev-key translations))
439 (setq translations nil
440 prev-key key)))
441 (if tit-dictionary
442 (progn
443 (setq pos (point))
444 (skip-chars-forward "^ \t#\n")
445 (setq translations
446 (if translations
447 (concat translations
448 (buffer-substring pos (point)))
449 (buffer-substring pos (point)))))
450 (while (not (eolp))
451 (setq pos (point))
452 (skip-chars-forward "^ \t\n")
453 (setq translations (cons (buffer-substring pos (point))
454 translations))
455 (skip-chars-forward " \t")
456 (setq ch (following-char))
457 (if (= ch ?#) (end-of-line))))
08a1bf22 458 (forward-line 1))))
1375754c
KH
459
460 (if translations
461 (tit-flush-translations prev-key translations))
462 (princ ")\n")))
4ed46869
KH
463
464;;;###autoload
465(defun titdic-convert (filename &optional dirname)
466 "Convert a TIT dictionary of FILENAME into a Quail package.
467Optional argument DIRNAME if specified is the directory name under which
468the generated Quail package is saved."
469 (interactive "FTIT dictionary file: ")
c7211fed
DL
470 (let ((coding-system-for-write 'iso-2022-7bit))
471 (with-temp-file (tit-make-quail-package-file-name filename dirname)
472 (set-buffer-file-coding-system 'iso-2022-7bit)
473 (let ((standard-output (current-buffer)))
474 (with-temp-buffer
475 (set-buffer-multibyte nil)
476 (let ((coding-system-for-read 'no-conversion))
477 (insert-file-contents (expand-file-name filename)))
a1506d29 478
c7211fed
DL
479 ;; Decode the buffer contents from the encoding specified by a
480 ;; value of the key "ENCODE:".
481 (if (not (search-forward "\nBEGIN" nil t))
482 (error "TIT dictionary doesn't have body part"))
483 (let ((limit (point))
484 coding-system slot)
485 (goto-char (point-min))
486 (if (re-search-forward "^ENCODE:[ \t]*" limit t)
487 (progn
488 (goto-char (match-end 0))
489 (setq tit-encode (tit-read-key-value)))
490 (setq tit-encode tit-default-encode))
491 (setq slot (assoc tit-encode tit-encode-list))
492 (if (not slot)
493 (error "Invalid ENCODE: value in TIT dictionary"))
494 (setq coding-system (nth 1 slot))
495 (message "Decoding with coding system %s..." coding-system)
496 (goto-char (point-min))
497 (decode-coding-region (point-min) (point-max) coding-system))
498
499 ;; Set point the starting position of the body part.
1375754c 500 (goto-char (point-min))
c7211fed
DL
501 (if (not (search-forward "\nBEGIN" nil t))
502 (error "TIT dictionary can't be decoded correctly"))
503
c3ff164a
KH
504 ;; Process the header part in multibyte mode.
505 (with-current-buffer standard-output
506 (set-buffer-multibyte t))
507 (set-buffer-multibyte t)
c7211fed
DL
508 (forward-line 1)
509 (narrow-to-region (point-min) (point))
510 (tit-process-header filename)
511 (widen)
512
513 ;; Process the body part. For speed, we turn off multibyte facility.
514 (with-current-buffer standard-output
515 (set-buffer-multibyte nil))
516 (set-buffer-multibyte nil)
517 (tit-process-body))))))
4ed46869
KH
518
519;;;###autoload
44cbfae9 520(defun batch-titdic-convert (&optional force)
4ed46869
KH
521 "Run `titdic-convert' on the files remaining on the command line.
522Use this from the command line, with `-batch';
523it won't work in an interactive Emacs.
524For example, invoke \"emacs -batch -f batch-titdic-convert XXX.tit\" to
525 generate Quail package file \"xxx.el\" from TIT dictionary file \"XXX.tit\".
526To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
527 (defvar command-line-args-left) ; Avoid compiler warning.
528 (if (not noninteractive)
529 (error "`batch-titdic-convert' should be used only with -batch"))
530 (if (string= (car command-line-args-left) "-h")
531 (progn
532 (message "To convert XXX.tit and YYY.tit into xxx.el and yyy.el:")
533 (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert XXX.tit YYY.tit")
534 (message "To convert XXX.tit into DIR/xxx.el:")
535 (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert -dir DIR XXX.tit"))
536 (let (targetdir filename files file)
537 (if (string= (car command-line-args-left) "-dir")
538 (progn
539 (setq command-line-args-left (cdr command-line-args-left))
540 (setq targetdir (car command-line-args-left))
541 (setq command-line-args-left (cdr command-line-args-left))))
542 (while command-line-args-left
543 (setq filename (expand-file-name (car command-line-args-left)))
544 (if (file-directory-p filename)
545 (progn
546 (message "Converting all tit files in the directory %s" filename)
547 (setq files (directory-files filename t "\\.tit$")))
548 (setq files (list filename)))
549 (while files
550 (setq file (expand-file-name (car files)))
44cbfae9
KH
551 (when (or force
552 (file-newer-than-file-p
553 file (tit-make-quail-package-file-name file targetdir)))
554 (message "Converting %s to quail-package..." file)
555 (titdic-convert file targetdir))
4ed46869
KH
556 (setq files (cdr files)))
557 (setq command-line-args-left (cdr command-line-args-left)))
86e4f7c0 558 (message "Byte-compile the created files by:")
4ed46869
KH
559 (message " %% emacs -batch -f batch-byte-compile XXX.el")))
560 (kill-emacs 0))
561
c063e381
KH
562\f
563;;; Converter of miscellaneous dictionaries other than TIT format.
564
565;; Alist of input method names and the corresponding information.
566;; Each element has this form:
567;; (INPUT-METHOD-NAME ;; Name of the input method.
a1506d29 568;; INPUT-METHOD-TITLE ;; Title string of the input method
c063e381
KH
569;; DICFILE ;; Name of the source dictionary file.
570;; CODING ;; Coding system of the dictionary file.
571;; QUAILFILE ;; Name of the Quail package file.
572;; CONVERTER ;; Function to generate the Quail package.
573;; COPYRIGHT-NOTICE ;; Copyright notice of the source dictionary.
574;; )
575
576(defvar quail-misc-package-ext-info
577 '(("chinese-b5-tsangchi" "\e$(06A\e(BB"
a1506d29 578 "cangjie-table.b5" big5 "tsang-b5.el"
c063e381
KH
579 tsang-b5-converter
580 "\
581;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
582;; #
583;; # Permission to copy and distribute both modified and
584;; # unmodified versions is granted without royalty provided
585;; # this notice is preserved.")
586
587 ("chinese-b5-quick" "\e$(0X|\e(BB"
a1506d29 588 "cangjie-table.b5" big5 "quick-b5.el"
c063e381
KH
589 quick-b5-converter
590 "\
591;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
592;; #
593;; # Permission to copy and distribute both modified and
594;; # unmodified versions is granted without royalty provided
595;; # this notice is preserved.")
596
597 ("chinese-cns-tsangchi" "\e$(GT?\e(BC"
598 "cangjie-table.cns" iso-2022-cn-ext "tsang-cns.el"
599 tsang-cns-converter
600 "\
601;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
602;; #
603;; # Permission to copy and distribute both modified and
604;; # unmodified versions is granted without royalty provided
605;; # this notice is preserved.")
606
607 ("chinese-cns-quick" "\e$(Gv|\e(BC"
608 "cangjie-table.cns" iso-2022-cn-ext "quick-cns.el"
609 quick-cns-converter
610 "\
611;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
612;; #
613;; # Permission to copy and distribute both modified and
614;; # unmodified versions is granted without royalty provided
615;; # this notice is preserved.")
616
617 ("chinese-py" "\e$AF4\e(BG"
618 "pinyin.map" cn-gb-2312 "PY.el"
619 py-converter
620 "\
621;; \"pinyin.map\" is included in a free package called CCE. It is
622;; available at:
623;; http://ftp.debian.org/debian/dists/potato/main
624;; /source/utils/cce_0.36.orig.tar.gz
625;; This package contains the following copyright notice.
626;;
627;;
628;; Copyright (C) 1999, Rui He, herui@cs.duke.edu
a1506d29
JB
629;;
630;;
c063e381 631;; CCE(Console Chinese Environment) 0.32
a1506d29
JB
632;;
633;; CCE is free software; you can redistribute it and/or modify it under the
634;; terms of the GNU General Public License as published by the Free Software
635;; Foundation; either version 1, or (at your option) any later version.
636;;
637;; CCE is distributed in the hope that it will be useful, but WITHOUT ANY
638;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
639;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
640;; details.
641;;
c063e381 642;; You should have received a copy of the GNU General Public License along with
a1506d29 643;; CCE; see the file COPYING. If not, write to the Free Software Foundation,
c063e381
KH
644;; 675 Mass Ave, Cambridge, MA 02139, USA.")
645
646 ("chinese-ziranma" "\e$AWTH;\e(B"
647 "ziranma.cin" cn-gb-2312 "ZIRANMA.el"
648 ziranma-converter
649 "\
650;; \"ziranma.cin\" is included in a free package called CCE. It is
651;; available at:
652;; http://ftp.debian.org/debian/dists/potato/main
653;; /source/utils/cce_0.36.orig.tar.gz
654;; This package contains the following copyright notice.
655;;
656;;
657;; Copyright (C) 1999, Rui He, herui@cs.duke.edu
a1506d29
JB
658;;
659;;
c063e381 660;; CCE(Console Chinese Environment) 0.32
a1506d29
JB
661;;
662;; CCE is free software; you can redistribute it and/or modify it under the
663;; terms of the GNU General Public License as published by the Free Software
664;; Foundation; either version 1, or (at your option) any later version.
665;;
666;; CCE is distributed in the hope that it will be useful, but WITHOUT ANY
667;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
668;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
669;; details.
670;;
c063e381 671;; You should have received a copy of the GNU General Public License along with
a1506d29 672;; CCE; see the file COPYING. If not, write to the Free Software Foundation,
c063e381 673;; 675 Mass Ave, Cambridge, MA 02139, USA.")
a4c4011b 674
a4c4011b
AC
675 ("chinese-ctlau" "\e$AAuTA\e(B"
676 "CTLau.html" cn-gb-2312 "CTLau.el"
677 ctlau-gb-converter
678 "\
679;; \"CTLau.html\" is available at:
680;;
681;; http://umunhum.stanford.edu/~lee/chicomp/CTLau.html
682;;
683;; It contains the following copyright notice:
684;;
685;; # Copyright (C) 1988-2001 Fung Fung Lee (lee@umunhum.stanford.edu)
a1506d29 686;; #
a4c4011b
AC
687;; # This program is free software; you can redistribute it and/or
688;; # modify it under the terms of the GNU General Public License
689;; # as published by the Free Software Foundation; either version 2
690;; # of the License, or any later version.
a1506d29 691;; #
a4c4011b
AC
692;; # This program is distributed in the hope that it will be useful,
693;; # but WITHOUT ANY WARRANTY; without even the implied warranty of
694;; # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
695;; # GNU General Public License for more details.
a1506d29 696;; #
a4c4011b
AC
697;; # You should have received a copy of the GNU General Public License
698;; # along with this program; if not, write to the Free Software Foundation,
086add15 699;; # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.")
a4c4011b 700
e089be2d 701 ("chinese-ctlaub" "\e$(0N,Gn\e(B"
2bef0948 702 "CTLau-b5.html" big5 "CTLau-b5.el"
a4c4011b
AC
703 ctlau-b5-converter
704 "\
705;; \"CTLau-b5.html\" is available at:
706;;
707;; http://umunhum.stanford.edu/~lee/chicomp/CTLau-b5.html
708;;
709;; It contains the following copyright notice:
710;;
711;; # Copyright (C) 1988-2001 Fung Fung Lee (lee@umunhum.stanford.edu)
a1506d29 712;; #
a4c4011b
AC
713;; # This program is free software; you can redistribute it and/or
714;; # modify it under the terms of the GNU General Public License
715;; # as published by the Free Software Foundation; either version 2
716;; # of the License, or any later version.
a1506d29 717;; #
a4c4011b
AC
718;; # This program is distributed in the hope that it will be useful,
719;; # but WITHOUT ANY WARRANTY; without even the implied warranty of
720;; # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
721;; # GNU General Public License for more details.
a1506d29 722;; #
a4c4011b
AC
723;; # You should have received a copy of the GNU General Public License
724;; # along with this program; if not, write to the Free Software Foundation,
086add15 725;; # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.")
c063e381
KH
726 ))
727
728;; Generate a code of a Quail package in the current buffer from Tsang
729;; dictionary in the buffer DICBUF. The input method name of the
730;; Quail package is NAME, and the title string is TITLE.
731
732;; TSANG-P is non-nil, genereate \e$(06AQo\e(B input method. Otherwise
733;; generate \e$(0X|/y\e(B (simple version of \e$(06AQo\e(B). If BIG5-P is non-nil, the
734;; input method is for inputting Big5 characters. Otherwise the input
735;; method is for inputting CNS characters.
736
737(defun tsang-quick-converter (dicbuf name title tsang-p big5-p)
738 (let ((fulltitle (if tsang-p (if big5-p "\e$(06AQo\e(B" "\e$(GT?on\e(B")
739 (if big5-p "\e$(0X|/y\e(B" "\e$(Gv|Mx\e(B")))
740 dic)
741 (goto-char (point-max))
742 (if big5-p
743 (insert (format "\"\e$(0&d'GTT&,!J\e(B%s\e$(0!K\e(BBIG5
744
745 \e$(0KHM$\e(B%s\e$(0TT&,WoOu\e(B
746
747 [Q \e$(0'D\e(B] [W \e$(0(q\e(B] [E \e$(0'V\e(B] [R \e$(0&H\e(B] [T \e$(0'>\e(B] [Y \e$(0&4\e(B] [U \e$(0&U\e(B] [I \e$(0'B\e(B] [O \e$(0&*\e(B] [P \e$(0'A\e(B]
748
749 [A \e$(0'K\e(B] [S \e$(0&T\e(B] [D \e$(0'N\e(B] [F \e$(0'W\e(B] [G \e$(0&I\e(B] [H \e$(0*M\e(B] [J \e$(0&3\e(B] [L \e$(0&d\e(B]
a1506d29 750
c063e381
KH
751 [Z ] [X \e$(0[E\e(B] [C \e$(01[\e(B] [V \e$(0&M\e(B] [B \e$(0'M\e(B] [N \e$(0&_\e(B] [M \e$(0&"\e(B]
752
753\\\\<quail-translation-docstring>\"\n"
754 fulltitle fulltitle))
755 (insert (format "\"\e$(GDcEFrSD+!J\e(B%s\e$(G!K\e(BCNS
756
757 \e$(GiGk#\e(B%s\e$(GrSD+uomu\e(B
758
759 [Q \e$(GEC\e(B] [W \e$(GFp\e(B] [E \e$(GEU\e(B] [R \e$(GDG\e(B] [T \e$(GE=\e(B] [Y \e$(GD3\e(B] [U \e$(GDT\e(B] [I \e$(GEA\e(B] [O \e$(GD)\e(B] [P \e$(GE@\e(B]
760
761 [A \e$(GEJ\e(B] [S \e$(GDS\e(B] [D \e$(GEM\e(B] [F \e$(GEV\e(B] [G \e$(GDH\e(B] [H \e$(GHL\e(B] [J \e$(GD2\e(B] [L \e$(GDc\e(B]
a1506d29
JB
762
763 [Z ] [X \e$(GyE\e(B] [C \e$(GOZ\e(B] [V \e$(GDL\e(B] [B \e$(GEL\e(B] [N \e$(GD^\e(B] [M \e$(GD!\e(B]
c063e381
KH
764
765\\\\<quail-translation-docstring>\"\n"
766 fulltitle fulltitle)))
767 (insert " '((\".\" . quail-next-translation-block)
768 (\",\" . quail-prev-translation-block))
769 nil nil)\n\n")
770 (insert "(quail-define-rules\n")
771 (save-excursion
772 (set-buffer dicbuf)
6ed24d2e
JR
773 ;; Handle double CR line ends, which result when checking out of
774 ;; CVS on MS-Windows.
775 (goto-char (point-min))
776 (while (re-search-forward "\r\r$" nil t)
777 (replace-match ""))
c063e381
KH
778 (goto-char (point-min))
779 (search-forward "A440")
780 (beginning-of-line)
781 (let ((table (make-hash-table :test 'equal))
782 val)
783 (while (not (eobp))
784 (forward-char 5)
785 (let ((trans (char-to-string (following-char)))
786 key slot)
787 (re-search-forward "[A-Z]+$" nil t)
788 (setq key (downcase
789 (if (or tsang-p
790 (<= (- (match-end 0) (match-beginning 0)) 1))
791 (match-string 0)
792 (string (char-after (match-beginning 0))
793 (char-after (1- (match-end 0)))))))
794 (setq val (gethash key table))
795 (if val (setq trans (concat val trans)))
796 (puthash key trans table)
797 (forward-line 1)))
798 (maphash #'(lambda (key val) (setq dic (cons (cons key val) dic)))
799 table)))
800 (setq dic (sort dic (function (lambda (x y) (string< (car x ) (car y))))))
801 (dolist (elt dic)
802 (insert (format "(%S\t%S)\n" (car elt) (cdr elt))))
803 (let ((punctuations '((";" "\e$(0!'!2!"!#!.!/\e(B" "\e$(G!'!2!"!#!.!/\e(B")
804 (":" "\e$(0!(!+!3!%!$!&!0!1\e(B" "\e$(G!(!+!3!%!$!&!0!1\e(B")
805 ("'" "\e$(0!e!d\e(B" "\e$(G!e!d\e(B")
806 ("\"" "\e$(0!g!f!h!i!q\e(B" "\e$(G!g!f!h!i!q\e(B")
807 ("\\" "\e$(0"`"b#M\e(B" "\e$(G"`"b#M\e(B")
808 ("|" "\e$(0!6!8!:"^\e(B" "\e$(G!6!8!:"^\e(B")
809 ("/" "\e$(0"_"a#L\e(B" "\e$(G"_"a#L\e(B")
810 ("?" "\e$(0!)!4\e(B" "\e$(G!)!4\e(B")
811 ("<" "\e$(0!R"6"A!T"H\e(B" "\e$(G!R"6"A!T"H\e(B")
812 (">" "\e$(0!S"7"B!U\e(B" "\e$(G!S"7"B!U\e(B")
813 ("[" "\e$(0!F!J!b!H!L!V!Z!X!\\e(B" "\e$(G!F!J!b!H!L!V!Z!X!\\e(B")
814 ("]" "\e$(0!G!K!c!I!M!W![!Y!]\e(B" "\e$(G!G!K!c!I!M!W![!Y!]\e(B")
815 ("{" "\e$(0!B!`!D\e(B " "\e$(G!B!`!D\e(B ")
816 ("}" "\e$(0!C!a!E\e(B" "\e$(G!C!a!E\e(B")
817 ("`" "\e$(0!j!k\e(B" "\e$(G!j!k\e(B")
818 ("~" "\e$(0"D"+",!<!=\e(B" "\e$(G"D"+",!<!=\e(B")
819 ("!" "\e$(0!*!5\e(B" "\e$(G!*!5\e(B")
820 ("@" "\e$(0"i"n\e(B" "\e$(G"i"n\e(B")
821 ("#" "\e$(0!l"-\e(B" "\e$(G!l"-\e(B")
822 ("$" "\e$(0"c"l\e(B" "\e$(G"c"l\e(B")
823 ("%" "\e$(0"h"m\e(B" "\e$(G"h"m\e(B")
824 ("&" "\e$(0!m".\e(B" "\e$(G!m".\e(B")
825 ("*" "\e$(0!n"/!o!w!x\e(B" "\e$(G!n"/!o!w!x\e(B")
826 ("(" "\e$(0!>!^!@\e(B" "\e$(G!>!^!@\e(B")
827 (")" "\e$(0!?!_!A\e(B" "\e$(G!?!_!A\e(B")
828 ("-" "\e$(0!7!9"#"$"1"@\e(B" "\e$(G!7!9"#"$"1"@\e(B")
829 ("_" "\e$(0"%"&\e(B" "\e$(G"%"&\e(B")
830 ("=" "\e$(0"8"C\e(B" "\e$(G"8"C\e(B")
831 ("+" "\e$(0"0"?\e(B" "\e$(G"0"?\e(B"))))
832 (dolist (elt punctuations)
833 (insert (format "(%S %S)\n" (concat "z" (car elt))
834 (if big5-p (nth 1 elt) (nth 2 elt))))))
835 (insert ")\n")))
836
837(defun tsang-b5-converter (dicbuf name title)
838 (tsang-quick-converter dicbuf name title t t))
839
840(defun quick-b5-converter (dicbuf name title)
841 (tsang-quick-converter dicbuf name title nil t))
842
843(defun tsang-cns-converter (dicbuf name title)
844 (tsang-quick-converter dicbuf name title t nil))
845
846(defun quick-cns-converter (dicbuf name title)
847 (tsang-quick-converter dicbuf name title nil nil))
848
849;; Generate a code of a Quail package in the current buffer from
850;; Pinyin dictionary in the buffer DICBUF. The input method name of
851;; the Quail package is NAME, and the title string is TITLE.
852
853(defun py-converter (dicbuf name title)
854 (goto-char (point-max))
855 (insert (format "%S\n" "\e$A::WVJdHk!KF4Rt!K\e(B
856
857 \e$AF4Rt7=08\e(B
858
859 \e$AP!P4S"NDWVD84z1m!8F4Rt!97{:E#,\e(B \"u(yu) \e$ATrSC\e(B u: \e$A1mJ>!C\e(B
860
861Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312').
862
c3ff164a 863Pinyin is the standard roman transliteration method for Chinese.
c063e381
KH
864Pinyin uses a sequence of Latin alphabetic characters for each Chinese
865character. The sequence is made by the combination of the initials
866\(the beginning sounds) and finals (the ending sounds).
867
868 initials: b p m f d t n l z c s zh ch sh r j q x g k h
869 finals: a o e i er ai ei oa ou an en ang eng ong i ia iao ie iu ian in
870 iang ing iong u ua uo uai ui uan un uan ueng yu yue yuan yun
871
872 (Note: In the correct Pinyin writing, the sequence \"yu\" in the last
873 four finals should be written by the character u-umlaut `\e$A(9\e(B'.)
874
875With this input method, you enter a Chinese character by first
876entering its pinyin spelling.
877
878\\<quail-translation-docstring>
879
880For instance, to input \e$ADc\e(B, you type \"n i C-n 3\". The first \"n i\"
881is a Pinyin, \"C-n\" selects the next group of candidates (each group
882contains at most 10 characters), \"3\" select the third character in
883that group.
884
885This input method supports only Han characters. The related input
886method `chinese-py-punct' is the combination of this method and
887`chinese-punct'; it supports both Han characters and punctuation
888characters.
889
890For double-width GB2312 characters corresponding to ASCII, use the
891input method `chinese-qj'.
892
893The correct Pinyin system specifies tones by diacritical marks, but
894this input method doesn't use them, which results in easy (you don't
895have to know the exact tones), but verbose (many characters are assigned
896to the same key sequence) input. You may also want to try the input
897method `chinese-tonepy' with which you must specify tones by digits
898\(1..5)."))
899 (insert " '((\"\C-?\" . quail-delete-last-char)
900 (\".\" . quail-next-translation)
901 (\">\" . quail-next-translation)
902 (\",\" . quail-prev-translation)
903 (\"<\" . quail-prev-translation))
904 nil nil nil nil)\n\n")
905 (insert "(quail-define-rules\n")
906 (let ((pos (point)))
907 (insert-buffer-substring dicbuf)
908 (goto-char pos)
909 (while (not (eobp))
910 (insert "(\"")
911 (skip-chars-forward "a-z")
912 (insert "\" \"")
913 (delete-char 1)
914 (end-of-line)
915 (insert "\")")
916 (forward-line 1)))
917 (insert ")\n"))
918
919;; Generate a code of a Quail package in the current buffer from
920;; Ziranma dictionary in the buffer DICBUF. The input method name of
921;; the Quail package is NAME, and the title string is TITLE.
922
923(defun ziranma-converter (dicbuf name title)
924 (let (dic)
925 (save-excursion
926 (set-buffer dicbuf)
927 (goto-char (point-min))
928 (search-forward "%keyname end\n")
929 (let ((table (make-hash-table :test 'equal))
930 elt pos key trans val)
931 (while (not (eobp))
932 (setq pos (point))
933 (skip-chars-forward "^ \t")
934 (setq key (buffer-substring pos (point)))
935 (skip-chars-forward " \t")
936 (setq trans (vector (buffer-substring (point) (line-end-position))))
937 (setq val (gethash key table))
938 (if val (setq trans (vconcat val trans)))
939 (puthash key trans table)
940 (forward-line 1))
941 (maphash #'(lambda (key trans)
942 (let ((len (length trans))
943 i)
944 (if (and (= len 1) (= (length (aref trans 0)) 1))
945 (setq trans (aref trans 0))
946 (setq i 0)
947 (while (and (< i len)
948 (= (length (aref trans i)) 1))
949 (setq i (1+ i)))
950 (if (= i len)
951 (setq trans (mapconcat 'identity trans "")))))
952 (setq dic (cons (cons key trans) dic)))
953 table)))
954 (setq dic (sort dic (function (lambda (x y) (string< (car x) (car y))))))
955 (goto-char (point-max))
956 (insert (format "%S\n" "\e$A::WVJdHk!K!>WTH;!?!K\e(B
957
958 \e$A<|EL6TUU1m\e(B:
959 \e$A)3)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)7\e(B
960 \e$A)'#Q\e(B \e$A)'#W\e(B \e$A)'#E\e(B \e$A)'#R\e(B \e$A)'#T\e(B \e$A)'#Y\e(B \e$A)'#U\e(Bsh\e$A)'#I\e(Bch\e$A)'#O\e(B \e$A)'#P\e(B \e$A)'\e(B
961 \e$A)'\e(B iu\e$A)'\e(B ua\e$A)'\e(B e\e$A)'\e(B uan\e$A)'\e(B ue\e$A)'\e(B uai\e$A)'\e(B u\e$A)'\e(B i\e$A)'\e(B o\e$A)'\e(B un\e$A)'\e(B
962 \e$A)'\e(B \e$A)'\e(B ia\e$A)'\e(B \e$A)'\e(B van\e$A)'\e(B ve\e$A)'\e(B ing\e$A)'\e(B \e$A)'\e(B \e$A)'\e(B uo\e$A)'\e(B vn\e$A)'\e(B
963 \e$A);)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)?\e(B
964 \e$A)'#A\e(B \e$A)'#S\e(B \e$A)'#D\e(B \e$A)'#F\e(B \e$A)'#G\e(B \e$A)'#H\e(B \e$A)'#J\e(B \e$A)'#K\e(B \e$A)'#L\e(B \e$A)'\e(B
965 \e$A)'\e(B a\e$A)'\e(Biong\e$A)'\e(Buang\e$A)'\e(B en\e$A)'\e(B eng\e$A)'\e(B ang\e$A)'\e(B an\e$A)'\e(B ao\e$A)'\e(B ai\e$A)'\e(B
966 \e$A)'\e(B \e$A)'\e(B ong\e$A)'\e(Biang\e$A)'\e(B \e$A)'\e(B ng\e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B
967 \e$A);)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)%)7\e(B
968 \e$A)'#Z\e(B \e$A)'#X\e(B \e$A)'#C\e(B \e$A)'#V\e(Bzh\e$A)'#B\e(B \e$A)'#N\e(B \e$A)'#M\e(B \e$A)'#,\e(B \e$A)'#.\e(B \e$A)'\e(B \e$A#/\e(B \e$A)'\e(B
969 \e$A)'\e(B ei\e$A)'\e(B ie\e$A)'\e(B iao\e$A)'\e(B ui\e$A)'\e(B ou\e$A)'\e(B in\e$A)'\e(B ian\e$A)'G0R3)':sR3)'7{:E)'\e(B
970 \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B v\e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B
971 \e$A);)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)?\e(B
972
973
974Pinyin base input method for Chinese GB2312 characters (`chinese-gb2312').
975
976Pinyin is the standard roman transliteration method for Chinese.
977For the details of Pinyin system, see the documentation of the input
978method `chinese-py'.
979
980Unlike the standard spelling of Pinyin, in this input method all
981initials and finals are assigned to single keys (see the above table).
982For instance, the initial \"ch\" is assigned to the key `i', the final
983\"iu\" is assigned to the key `q', and tones 1, 2, 3, 4, and \e$AGaIy\e(B are
984assigned to the keys `q', `w', `e', `r', `t' respectively.
985
986\\<quail-translation-docstring>
987
988To input one-letter words, you type 4 keys, the first two for the
989Pinyin of the letter, next one for tone, and the last one is always a
990quote ('). For instance, \"vsq'\" input \e$AVP\e(B. Exceptions are these
991letters. You can input them just by typing a single key.
992
993 Character: \e$A04\e(B \e$A2;\e(B \e$A4N\e(B \e$A5D\e(B \e$A6~\e(B \e$A7"\e(B \e$A8v\e(B \e$A:M\e(B \e$A3v\e(B \e$A<0\e(B \e$A?I\e(B \e$AAK\e(B \e$AC;\e(B
994 Key: a b c d e f g h i j k l m
995 Character: \e$ADc\e(B \e$AE7\e(B \e$AF,\e(B \e$AF_\e(B \e$AHK\e(B \e$AH}\e(B \e$AK{\e(B \e$AJG\e(B \e$AWE\e(B \e$ANR\e(B \e$AP!\e(B \e$AR;\e(B \e$ATZ\e(B
996 Key: n o p q r s t u v w x y z
997
998To input two-letter words, you have two ways. One way is to type 4
999keys, two for the first Pinyin, two for the second Pinyin. For
1000instance, \"vsgo\" inputs \e$AVP9z\e(B. Another way is to type 3 keys: 2
1001initials of two letters, and quote ('). For instance, \"vg'\" also
1002inputs \e$AVP9z\e(B.
1003
1004To input three-letter words, you type 4 keys: initials of three
1005letters, and the last is quote ('). For instance, \"bjy'2\" inputs \e$A11\e(B
1006\e$A>)Q<\e(B (the last `2' is to select one of the candidates).
1007
1008To input words of more than three letters, you type 4 keys, initials
1009of the first three letters and the last letter. For instance,
1010\"bjdt\" inputs \e$A11>)5gJSL(\e(B.
1011
1012To input symbols and punctuations, type `/' followed by one of `a' to
1013`z', then select one of the candidates."))
1014 (insert " '((\"\C-?\" . quail-delete-last-char)
1015 (\".\" . quail-next-translation)
1016 (\"[\" . quail-next-translation)
1017 (\",\" . quail-prev-translation)
1018 (\"]\" . quail-prev-translation))
1019 nil nil nil nil)\n\n")
1020 (insert "(quail-define-rules\n")
1021 (dolist (elt dic)
1022 (insert (format "(%S %S)\n" (car elt) (cdr elt))))
1023 (insert ")\n")))
1024
a4c4011b
AC
1025;; Generate the code for a Quail package in the current buffer from a
1026;; CTLau or CTLau-b5 dictionary in the buffer DICBUF. The input
1027;; method name of the Quail package is NAME, and the title string is
1028;; TITLE. DESCRIPTION is the string shown by describe-input-method.
1029
1030(defun ctlau-converter (dicbuf name title description)
1031 (goto-char (point-max))
1032 (insert (format "%S\n" description))
1033 (insert " '((\"\C-?\" . quail-delete-last-char)
1034 (\".\" . quail-next-translation)
1035 (\">\" . quail-next-translation)
1036 (\",\" . quail-prev-translation)
1037 (\"<\" . quail-prev-translation))
1038 nil nil nil nil)\n\n")
1039 (insert "(quail-define-rules\n")
1040 (let (dicbuf-start dicbuf-end key-start key (pos (point)))
1041 ;; Find the dictionary, which starts below a horizontal rule and
1042 ;; ends at the second to last line in the HTML file.
1043 (save-excursion
1044 (set-buffer dicbuf)
1045 (goto-char (point-min))
1046 (search-forward "#\n#<hr>\n")
1047 (setq dicbuf-start (point))
1048 (goto-char (point-max))
1049 (forward-line -1)
1050 (setq dicbuf-end (point)))
1051 (insert-buffer-substring dicbuf dicbuf-start dicbuf-end)
1052 ;; CTLau-b5.html contains characters (0xa1 0xbc) which show up as
1053 ;; hollow boxes when the original characters in CTLau.html from
1054 ;; which the file is converted have no Big5 equivalent. Go
1055 ;; through and delete them.
1056 (goto-char pos)
1057 (while (search-forward "\e$(0!{\e(B" nil t)
1058 (delete-char -1))
1059 ;; Uppercase keys in dictionary need to be downcased. Backslashes
1060 ;; at the beginning of keys need to be turned into double
1061 ;; backslashes.
1062 (goto-char pos)
1063 (while (not (eobp))
1064 (insert "(\"")
1065 (if (char-equal (following-char) ?\\)
1066 (insert "\\"))
1067 (setq key-start (point))
1068 (skip-chars-forward "\\\\A-Z")
1069 (downcase-region key-start (point))
1070 (insert "\" \"")
1071 (delete-char 1)
1072 (end-of-line)
1073 (insert "\")")
1074 (forward-line 1)))
1075 (insert ")\n"))
1076
1077(defun ctlau-gb-converter (dicbuf name title)
1078 (ctlau-converter dicbuf name title
1079"\e$A::WVJdHk!KAuN}OiJ=TARt!K\e(B
1080
1081 \e$AAuN}OiJ=TASoW"Rt7=08\e(B
1082 Sidney Lau's Cantonese transcription scheme as described in his book
1083 \"Elementary Cantonese\", The Government Printer, Hong Kong, 1972.
1084 This file was prepared by Fung Fung Lee (\e$A@n7c7e\e(B).
1085 Originally converted from CTCPS3.tit
1086 Last modified: June 2, 1993.
1087
4e7e1f03 1088 Some infrequent GB characters are accessed by typing \\, followed by
a4c4011b
AC
1089 the Cantonese romanization of the respective radical (\e$A2?JW\e(B)."))
1090
1091(defun ctlau-b5-converter (dicbuf name title)
1092 (ctlau-converter dicbuf name title
1093"\e$(0KH)tTT&,!(N,Tg>A*#Gn5x!(\e(B
1094
1095 \e$(0N,Tg>A*#GnM$0D5x'J7{\e(B
1096 Sidney Lau's Cantonese transcription scheme as described in his book
1097 \"Elementary Cantonese\", The Government Printer, Hong Kong, 1972.
1098 This file was prepared by Fung Fung Lee (\e$(0,XFS76\e(B).
1099 Originally converted from CTCPS3.tit
1100 Last modified: June 2, 1993.
1101
4e7e1f03 1102 Some infrequent characters are accessed by typing \\, followed by
a4c4011b
AC
1103 the Cantonese romanization of the respective radical (\e$(0?f5}\e(B)."))
1104
c063e381 1105(defun miscdic-convert (filename &optional dirname)
a1506d29 1106 "Convert a dictionary file FILENAME into a Quail package.
c063e381
KH
1107Optional argument DIRNAME if specified is the directory name under which
1108the generated Quail package is saved."
1109 (interactive "FInput method dictionary file: ")
1110 (or (file-readable-p filename)
1111 (error "%s does not exist" filename))
1112 (let ((tail quail-misc-package-ext-info)
1113 (default-buffer-file-coding-system 'iso-2022-7bit)
1114 slot
1115 name title dicfile coding quailfile converter copyright
1116 dicbuf)
1117 (while tail
d1df889e
KH
1118 (setq slot (car tail)
1119 dicfile (nth 2 slot)
1120 quailfile (nth 4 slot))
1121 (when (and (or (string-match dicfile filename)
1122 ;; MS-DOS filesystem truncates file names to 8+3
1123 ;; limits, so "cangjie-table.cns" becomes
1124 ;; "cangjie-.cns", and the above string-match
1125 ;; fails. Give DOS users a chance...
1126 (and (fboundp 'msdos-long-file-names)
1127 (not (msdos-long-file-names))
1128 (string-match (dos-8+3-filename dicfile) filename)))
1129 (if (file-newer-than-file-p
1130 filename (expand-file-name quailfile dirname))
1131 t
1132 (message "%s is up to date" quailfile)
1133 nil))
1134 (setq name (car slot)
c063e381 1135 title (nth 1 slot)
c063e381 1136 coding (nth 3 slot)
c063e381
KH
1137 converter (nth 5 slot)
1138 copyright (nth 6 slot))
1139 (message "Converting %s to %s..." dicfile quailfile)
1140 (with-temp-file (expand-file-name quailfile dirname)
1141 (set-buffer-file-coding-system 'iso-2022-7bit)
1142 (insert ";; Quail package `" name "' -*- coding:iso-2022-7bit; -*-\n")
1143 (insert ";; Generated by the command `miscdic-convert'\n")
1144 (insert ";; Date: " (current-time-string) "\n")
1145 (insert ";; Source dictionary file: " dicfile "\n")
1146 (insert ";; Copyright notice of the source file\n")
1147 (insert ";;------------------------------------------------------\n")
1148 (insert copyright "\n")
1149 (insert ";;------------------------------------------------------\n")
1150 (insert "\n")
1151 (insert ";;; Code:\n\n")
1152 (insert "(require 'quail)\n")
1153 (insert "(quail-define-package \"" name "\" \""
9d5db27a 1154 (if (eq coding 'big5) "Chinese-BIG5" "Chinese-CNS")
c063e381
KH
1155 "\" \"" title "\" t\n")
1156 (let* ((coding-system-for-read coding)
1157 (dicbuf (find-file-noselect filename)))
1158 (funcall converter dicbuf name title)
1159 (kill-buffer dicbuf)))
1160 (message "Converting %s to %s...done" dicfile quailfile))
1161 (setq tail (cdr tail)))))
1162
1163(defun batch-miscdic-convert ()
1164 "Run `miscdic-convert' on the files remaing on the command line.
1165Use this from the command line, with `-batch';
1166it won't work in an interactive Emacs.
1167If there's an argument \"-dir\", the next argument specifies a directory
1168to store generated Quail packages."
1169 (defvar command-line-args-left) ; Avoid compiler warning.
1170 (if (not noninteractive)
1171 (error "`batch-miscdic-convert' should be used only with -batch"))
1172 (let ((dir default-directory)
1173 filename)
1174 (while command-line-args-left
1175 (if (string= (car command-line-args-left) "-dir")
1176 (progn
1177 (setq command-line-args-left (cdr command-line-args-left))
0631f894
KH
1178 (setq dir (car command-line-args-left))
1179 (setq command-line-args-left (cdr command-line-args-left))))
c063e381
KH
1180 (setq filename (car command-line-args-left)
1181 command-line-args-left (cdr command-line-args-left))
1182 (if (file-directory-p filename)
1183 (dolist (file (directory-files filename t nil t))
1184 (miscdic-convert file dir))
1185 (miscdic-convert filename dir))))
1186 (kill-emacs 0))
1187
5cdaf2a5
KH
1188;; Local Variables:
1189;; coding: iso-2022-7bit
1190;; End:
60370d40 1191
ab5796a9 1192;;; arch-tag: 8ad478b2-a985-4da2-b47f-d8ee5d7c24a3
60370d40 1193;;; titdic-cnv.el ends here