| 1 | ;;; titdic-cnv.el --- convert cxterm dictionary (TIT format) to Quail package -*- coding:iso-2022-7bit; -*- |
| 2 | |
| 3 | ;; Copyright (C) 1997, 1998, 2000, 2001, 2002, 2003, 2004, |
| 4 | ;; 2005, 2006 Free Software Foundation, Inc. |
| 5 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
| 6 | ;; 2005, 2006 |
| 7 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
| 8 | ;; Registration Number H14PRO021 |
| 9 | |
| 10 | ;; Keywords: Quail, TIT, cxterm |
| 11 | |
| 12 | ;; This file is part of GNU Emacs. |
| 13 | |
| 14 | ;; GNU Emacs is free software; you can redistribute it and/or modify |
| 15 | ;; it under the terms of the GNU General Public License as published by |
| 16 | ;; the Free Software Foundation; either version 2, or (at your option) |
| 17 | ;; any later version. |
| 18 | |
| 19 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 20 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 21 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 22 | ;; GNU General Public License for more details. |
| 23 | |
| 24 | ;; You should have received a copy of the GNU General Public License |
| 25 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
| 26 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 27 | ;; Boston, MA 02110-1301, USA. |
| 28 | |
| 29 | ;;; Commentary: |
| 30 | |
| 31 | ;; Convert cxterm dictionary (of TIT format) to quail-package. |
| 32 | ;; |
| 33 | ;; Usage (within Emacs): |
| 34 | ;; M-x titdic-convert<CR>CXTERM-DICTIONARY-NAME<CR> |
| 35 | ;; Usage (from shell): |
| 36 | ;; % emacs -batch -l titdic-cnv -f batch-titdic-convert\ |
| 37 | ;; [-dir DIR] [DIR | FILE] ... |
| 38 | ;; |
| 39 | ;; When you run titdic-convert within Emacs, you have a chance to |
| 40 | ;; modify arguments of `quail-define-package' before saving the |
| 41 | ;; converted file. For instance, you are likely to modify TITLE, |
| 42 | ;; DOCSTRING, and KEY-BINDINGS. |
| 43 | |
| 44 | ;; Cxterm dictionary file (*.tit) is a line-oriented text (English, |
| 45 | ;; Chinese, Japanese, and Korean) file. The whole file contains of |
| 46 | ;; two parts, the definition part (`header' here after) followed by |
| 47 | ;; the dictionary part (`body' here after). All lines begin with |
| 48 | ;; leading '#' are ignored. |
| 49 | ;; |
| 50 | ;; Each line in the header part has two fields, KEY and VALUE. These |
| 51 | ;; fields are separated by one or more white characters. |
| 52 | ;; |
| 53 | ;; Each line in the body part has two fields, KEYSEQ and TRANSLATIONS. |
| 54 | ;; These fields are separated by one or more white characters. |
| 55 | ;; |
| 56 | ;; See the manual page of `tit2cit' of cxterm distribution for more |
| 57 | ;; detail. |
| 58 | ;; |
| 59 | ;; Near the end of this file, we also have a few other tools to convert |
| 60 | ;; miscellaneous dictionaries. |
| 61 | |
| 62 | ;;; Code: |
| 63 | |
| 64 | (require 'quail) |
| 65 | |
| 66 | ;; List of values of key "ENCODE:" and the corresponding Emacs |
| 67 | ;; coding-system and language environment name. |
| 68 | (defvar tit-encode-list |
| 69 | '(("GB" euc-china "Chinese-GB") |
| 70 | ("BIG5" cn-big5 "Chinese-BIG5") |
| 71 | ("JIS" euc-japan "Japanese") |
| 72 | ("KS" euc-kr "Korean"))) |
| 73 | |
| 74 | ;; Alist of input method names and the corresponding title and extra |
| 75 | ;; docstring. For each of input method generated from TIT dictionary, |
| 76 | ;; a docstring is automatically generated from the comments in the |
| 77 | ;; dictionary. The extra docstring in this alist is to add more |
| 78 | ;; information. |
| 79 | ;; The command describe-input-method shows the automatically generated |
| 80 | ;; docstring, then an extra docstring while replacing the form \<VAR> |
| 81 | ;; by the value of variable VAR. For instance, the form |
| 82 | ;; \<quail-translation-docstring> is replaced by a description about |
| 83 | ;; how to select a translation from a list of candidates. |
| 84 | |
| 85 | (defvar quail-cxterm-package-ext-info |
| 86 | '(("chinese-4corner" "\e$(0(?-F\e(B") |
| 87 | ("chinese-array30" "\e$(0#R#O\e(B") |
| 88 | ("chinese-ccdospy" "\e$AKuF4\e(B" |
| 89 | "Pinyin base input method for Chinese charset GB2312 \(`chinese-gb2312'). |
| 90 | |
| 91 | Pinyin is the standard Roman transliteration method for Chinese. |
| 92 | For the detail of Pinyin system, see the documentation of the input |
| 93 | method `chinese-py'. |
| 94 | |
| 95 | This input method works almost the same way as `chinese-py'. The |
| 96 | difference is that you type a single key for these Pinyin spelling. |
| 97 | Pinyin: zh en eng ang ch an ao ai ong sh ing yu(\e$A(9\e(B) |
| 98 | keyseq: a f g h i j k l s u y v |
| 99 | For example: |
| 100 | Chinese: \e$A0!\e(B \e$A9{\e(B \e$AVP\e(B \e$AND\e(B \e$A9b\e(B \e$ASq\e(B \e$AH+\e(B |
| 101 | Pinyin: a guo zhong wen guang yu quan |
| 102 | Keyseq: a1 guo4 as1 wf4 guh1 yu..6 qvj6 |
| 103 | |
| 104 | \\<quail-translation-docstring> |
| 105 | |
| 106 | For double-width GB2312 characters correponding to ASCII, use the |
| 107 | input method `chinese-qj'.") |
| 108 | |
| 109 | ("chinese-ecdict" "\e$(05CKH\e(B" |
| 110 | "In this input method, you enter a Chinese (Big5) charactere or word |
| 111 | by typing the corresponding English word. For example, if you type |
| 112 | \"computer\", \"\e$(0IZH+\e(B\" is input. |
| 113 | |
| 114 | \\<quail-translation-docstring>") |
| 115 | |
| 116 | ("chinese-etzy" "\e$(06/0D\e(B" |
| 117 | "Zhuyin base input method for Chinese Big5 characters (`chinese-big5-1', |
| 118 | `chinese-big5-2'). |
| 119 | |
| 120 | Zhuyin is a kind of phonetic symbol. One to three Zhuyin symbols |
| 121 | compose one Chinese character. |
| 122 | |
| 123 | In this input method, you enter a Chinese character by first typing |
| 124 | keys corresponding to Zhuyin symbols (see the above table) followed by |
| 125 | SPC, 1, 2, 3, or 4 specifing a tone (SPC:\e$(0?v(N\e(B, 1:\e$(0M=Vy\e(B, 2:\e$(0Dm(N\e(B, 3: \e$(0&9Vy\e(B, |
| 126 | 4:\e$(0(+Vy\e(B). |
| 127 | |
| 128 | \\<quail-translation-docstring>") |
| 129 | |
| 130 | ("chinese-punct-b5" "\e$(0O:\e(BB" |
| 131 | "Input method for Chinese punctuations and symbols of Big5 |
| 132 | \(`chinese-big5-1' and `chinese-big5-2').") |
| 133 | |
| 134 | ("chinese-punct" "\e$A1j\e(BG" |
| 135 | "Input method for Chinese punctuations and symbols of GB2312 |
| 136 | \(`chinese-gb2312').") |
| 137 | |
| 138 | ("chinese-py-b5" "\e$(03<\e(BB" |
| 139 | "Pinyin base input method for Chinese Big5 characters |
| 140 | \(`chinese-big5-1', `chinese-big5-2'). |
| 141 | |
| 142 | This input method works almost the same way as `chinese-py' (which |
| 143 | see). |
| 144 | |
| 145 | This input method supports only Han characters. The more convenient |
| 146 | method is `chinese-py-punct-b5', which is the combination of this |
| 147 | method and `chinese-punct-b5' and which supports both Han characters |
| 148 | and punctuation/symbols. |
| 149 | |
| 150 | For double-width Big5 characters corresponding to ASCII, use the input |
| 151 | method `chinese-qj-b5'. |
| 152 | |
| 153 | The input method `chinese-py' and `chinese-tonepy' are also Pinyin |
| 154 | based, but for the character set GB2312 (`chinese-gb2312').") |
| 155 | |
| 156 | ("chinese-qj-b5" "\e$(0)A\e(BB") |
| 157 | |
| 158 | ("chinese-qj" "\e$AH+\e(BG") |
| 159 | |
| 160 | ("chinese-sw" "\e$AJWN2\e(B" |
| 161 | "Radical base input method for Chinese charset GB2312 (`chinese-gb2312'). |
| 162 | |
| 163 | In this input method, you enter a Chinese character by typing two |
| 164 | keys. The first key corresponds to the first (\e$AJW\e(B) radical, the second |
| 165 | key corresponds to the last (\e$AN2\e(B) radical. The correspondence of keys |
| 166 | and radicals is as below: |
| 167 | |
| 168 | first radical: |
| 169 | a b c d e f g h i j k l m n o p q r s t u v w x y z |
| 170 | \e$APD\e(B \e$AZ"\e(B \e$AJ,\e(B \e$AX<\e(B \e$A;p\e(B \e$A?Z\e(B \e$A^P\e(B \e$Ac_\e(B \e$AZ%\e(B \e$A\3\e(B \e$AXi\e(B \e$AD>\e(B \e$Alj\e(B \e$Ab;\e(B \e$ATB\e(B \e$Afy\e(B \e$AJ/\e(B \e$AMu\e(B \e$A0K\e(B \e$AX/\e(B \e$AHU\e(B \e$AeA\e(B \e$Aak\e(B \e$AVq\e(B \e$AR;\e(B \e$AHK\e(B |
| 171 | last radical: |
| 172 | a b c d e f g h i j k l m n o p q r s t u v w x y z |
| 173 | \e$ASV\e(B \e$AI=\e(B \e$AMA\e(B \e$A56\e(B \e$AZb\e(B \e$A?Z\e(B \e$ARB\e(B \e$Aqb\e(B \e$A4s\e(B \e$A6!\e(B \e$A[L\e(B \e$Ala\e(B \e$AJ.\e(B \e$A4u\e(B \e$AXg\e(B \e$ACE\e(B \e$A=q\e(B \e$AX-\e(B \e$AE.\e(B \e$ARR\e(B \e$A`m\e(B \e$AP!\e(B \e$A3'\e(B \e$A3f\e(B \e$A_.\e(B \e$A27\e(B |
| 174 | |
| 175 | \\<quail-translation-docstring>") |
| 176 | |
| 177 | ("chinese-tonepy" "\e$A5wF4\e(B" |
| 178 | "Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312'). |
| 179 | |
| 180 | Pinyin is the standard roman transliteration method for Chinese. |
| 181 | For the details of Pinyin system, see the documentation of the input |
| 182 | method `chinese-py'. |
| 183 | |
| 184 | This input method works almost the same way as `chinese-py'. The |
| 185 | difference is that you must type 1..5 after each Pinyin spelling to |
| 186 | specify a tone (1:\e$ARuF=\e(B, 2:\e$AQtF=\e(B, 3:\e$AIOIy\e(B, 4\e$AOBIy\e(B, 5:\e$AGaIy\e(B). |
| 187 | |
| 188 | \\<quail-translation-docstring> |
| 189 | |
| 190 | For instance, to input \e$ADc\e(B, you type \"n i 3 3\", the first \"n i\" is |
| 191 | a Pinyin, the next \"3\" specifies tone, and the last \"3\" selects |
| 192 | the third character from the candidate list. |
| 193 | |
| 194 | For double-width GB2312 characters correponding to ASCII, use the |
| 195 | input method `chinese-qj'.") |
| 196 | |
| 197 | ("chinese-zozy" "\e$(0I\0D\e(B" |
| 198 | "Zhuyin base input method for Chinese Big5 characters (`chinese-big5-1', |
| 199 | `chinese-big5-2'). |
| 200 | |
| 201 | Zhuyin is a kind of a phonetic symbol. One to three Zhuyin symbols |
| 202 | compose a Chinese character. |
| 203 | |
| 204 | In this input method, you enter a Chinese character by first typing |
| 205 | keys corresponding to Zhuyin symbols (see the above table) followed by |
| 206 | SPC, 6, 3, 4, or 7 specifing a tone (SPC:\e$(0?v(N\e(B, 6:\e$(0Dm(N\e(B, 3:\e$(0&9Vy\e(B, 4:\e$(0(+Vy\e(B, |
| 207 | 7:\e$(0M=Vy\e(B). |
| 208 | |
| 209 | \\<quail-translation-docstring>"))) |
| 210 | |
| 211 | ;; Return a value of the key in the current line. |
| 212 | (defsubst tit-read-key-value () |
| 213 | (if (looking-at "[^ \t\r\n]+") |
| 214 | (car (read-from-string (concat "\"" (match-string 0) "\""))))) |
| 215 | |
| 216 | ;; Return an appropriate quail-package filename from FILENAME (TIT |
| 217 | ;; dictionary filename). For instance, ".../ZOZY.tit" -> "ZOZY.el". |
| 218 | (defun tit-make-quail-package-file-name (filename &optional dirname) |
| 219 | (expand-file-name |
| 220 | (concat (file-name-nondirectory (substring filename 0 -4)) ".el") |
| 221 | dirname)) |
| 222 | |
| 223 | ;; This value is nil if we are processing phrase dictionary. |
| 224 | (defvar tit-dictionary t) |
| 225 | (defvar tit-encode nil) |
| 226 | (defvar tit-default-encode "GB") |
| 227 | |
| 228 | ;; Generate elements of KEY-BINDINGS arg for `quail-define-package' so |
| 229 | ;; that each characters in KEYS invokes FUNCTION-SYMBOL. |
| 230 | (defun tit-generate-key-bindings (keys function-symbol) |
| 231 | (let ((len (length keys)) |
| 232 | (i 0) |
| 233 | (first t) |
| 234 | key) |
| 235 | (while (< i len) |
| 236 | (or first (princ "\n ")) |
| 237 | (setq key (aref keys i)) |
| 238 | (if (if (< key ?\ ) |
| 239 | (eq (lookup-key quail-translation-keymap |
| 240 | (char-to-string key)) |
| 241 | 'quail-execute-non-quail-command) |
| 242 | (<= key 127)) |
| 243 | (progn |
| 244 | (princ (cons (cond ((< key ?\ ) (format "\"\\C-%c\"" (+ key ?@))) |
| 245 | ((< key 127) (format "\"%c\"" key)) |
| 246 | (t "\"\\C-?\"")) |
| 247 | function-symbol)) |
| 248 | (setq first nil))) |
| 249 | (setq i (1+ i))))) |
| 250 | |
| 251 | ;; Analyze header part of TIT dictionary and generate an appropriate |
| 252 | ;; `quail-define-package' function call. |
| 253 | (defun tit-process-header (filename) |
| 254 | (message "Processing header part...") |
| 255 | (goto-char (point-min)) |
| 256 | |
| 257 | ;; At first, generate header part of the Quail package while |
| 258 | ;; collecting information from the original header. |
| 259 | (let ((package (concat |
| 260 | "chinese-" |
| 261 | (substring (downcase (file-name-nondirectory filename)) |
| 262 | 0 -4))) |
| 263 | ;; TIT keywords and the corresponding default values. |
| 264 | (tit-multichoice t) |
| 265 | (tit-prompt "") |
| 266 | (tit-comments nil) |
| 267 | (tit-backspace "\010\177") |
| 268 | (tit-deleteall "\015\025") |
| 269 | (tit-moveright ".>") |
| 270 | (tit-moveleft ",<") |
| 271 | (tit-keyprompt nil)) |
| 272 | |
| 273 | (princ ";; Quail package `") |
| 274 | (princ package) (princ "' -*- coding:iso-2022-7bit; -*-\n") |
| 275 | (princ ";; Generated by the command `titdic-convert'\n;;\tDate: ") |
| 276 | (princ (current-time-string)) |
| 277 | (princ "\n;;\tOriginal TIT dictionary file: ") |
| 278 | (princ (file-name-nondirectory filename)) |
| 279 | (princ "\n\n;;; Comment:\n\n") |
| 280 | (princ ";; Byte-compile this file again after any modification.\n\n") |
| 281 | (princ ";;; Start of the header of original TIT dictionary.\n\n") |
| 282 | |
| 283 | (while (not (eobp)) |
| 284 | (let ((ch (following-char)) |
| 285 | (pos (point))) |
| 286 | (cond ((= ch ?C) ; COMMENT |
| 287 | (cond ((looking-at "COMMENT") |
| 288 | (let ((pos (match-end 0))) |
| 289 | (end-of-line) |
| 290 | (setq tit-comments (cons (buffer-substring pos (point)) |
| 291 | tit-comments)))))) |
| 292 | ((= ch ?M) ; MULTICHOICE, MOVERIGHT, MOVELEFT |
| 293 | (cond ((looking-at "MULTICHOICE:[ \t]*") |
| 294 | (goto-char (match-end 0)) |
| 295 | (setq tit-multichoice (looking-at "YES"))) |
| 296 | ((looking-at "MOVERIGHT:[ \t]*") |
| 297 | (goto-char (match-end 0)) |
| 298 | (setq tit-moveright (tit-read-key-value))) |
| 299 | ((looking-at "MOVELEFT:[ \t]*") |
| 300 | (goto-char (match-end 0)) |
| 301 | (setq tit-moveleft (tit-read-key-value))))) |
| 302 | ((= ch ?P) ; PROMPT |
| 303 | (cond ((looking-at "PROMPT:[ \t]*") |
| 304 | (goto-char (match-end 0)) |
| 305 | (setq tit-prompt (tit-read-key-value)) |
| 306 | ;; Some TIT dictionaies that are encoded by |
| 307 | ;; euc-china contains invalid character at the tail. |
| 308 | (let* ((last (aref tit-prompt (1- (length tit-prompt)))) |
| 309 | (split (split-char last))) |
| 310 | (if (or (eq (nth 1 split) 32) |
| 311 | (eq (nth 2 split) 32)) |
| 312 | (setq tit-prompt (substring tit-prompt 0 -1))))))) |
| 313 | ((= ch ?B) ; BACKSPACE, BEGINDICTIONARY, |
| 314 | ; BEGINPHRASE |
| 315 | (cond ((looking-at "BACKSPACE:[ \t]*") |
| 316 | (goto-char (match-end 0)) |
| 317 | (setq tit-backspace (tit-read-key-value))) |
| 318 | ((looking-at "BEGINDICTIONARY") |
| 319 | (setq tit-dictionary t)) |
| 320 | ((looking-at "BEGINPHRASE") |
| 321 | (setq tit-dictionary nil)))) |
| 322 | ((= ch ?K) ; KEYPROMPT |
| 323 | (cond ((looking-at "KEYPROMPT(\\(.*\\)):[ \t]*") |
| 324 | (let ((key-char (match-string 1))) |
| 325 | (goto-char (match-end 0)) |
| 326 | (if (string-match "\\\\[0-9]+" key-char) |
| 327 | (setq key-char |
| 328 | (car (read-from-string (format "\"%s\"" |
| 329 | key-char))))) |
| 330 | (setq tit-keyprompt |
| 331 | (cons (cons key-char (tit-read-key-value)) |
| 332 | tit-keyprompt))))))) |
| 333 | (end-of-line) |
| 334 | (princ ";; ") |
| 335 | (princ (buffer-substring pos (point))) |
| 336 | (princ "\n") |
| 337 | (forward-line 1))) |
| 338 | |
| 339 | (princ "\n;;; End of the header of original TIT dictionary.\n\n") |
| 340 | (princ ";;; Code:\n\n(require 'quail)\n\n") |
| 341 | |
| 342 | (princ "(quail-define-package ") |
| 343 | ;; Args NAME, LANGUAGE, TITLE |
| 344 | (let ((title (nth 1 (assoc package quail-cxterm-package-ext-info)))) |
| 345 | (princ "\"") |
| 346 | (princ package) |
| 347 | (princ "\" \"") |
| 348 | (princ (nth 2 (assoc tit-encode tit-encode-list))) |
| 349 | (princ "\" \"") |
| 350 | (princ (or title |
| 351 | (if (string-match "[:\e$A!K\e$(0!(!J\e(B]+\\([^:\e$A!K\e$(0!(!K\e(B]+\\)" tit-prompt) |
| 352 | (substring tit-prompt (match-beginning 1) (match-end 1)) |
| 353 | tit-prompt))) |
| 354 | (princ "\"\n")) |
| 355 | |
| 356 | ;; Arg GUIDANCE |
| 357 | (if tit-keyprompt |
| 358 | (progn |
| 359 | (princ " '(") |
| 360 | (while tit-keyprompt |
| 361 | (princ " ") |
| 362 | (princ (format "(%d . \"%s\")\n" |
| 363 | (string-to-char (car (car tit-keyprompt))) |
| 364 | (cdr (car tit-keyprompt)))) |
| 365 | (setq tit-keyprompt (cdr tit-keyprompt))) |
| 366 | (princ ")")) |
| 367 | (princ " t\n")) |
| 368 | |
| 369 | ;; Arg DOCSTRING |
| 370 | (let ((doc (concat tit-prompt "\n")) |
| 371 | (comments (if tit-comments |
| 372 | (mapconcat 'identity (nreverse tit-comments) "\n"))) |
| 373 | (doc-ext (nth 2 (assoc package quail-cxterm-package-ext-info)))) |
| 374 | (if comments |
| 375 | (setq doc (concat doc "\n" comments "\n"))) |
| 376 | (if doc-ext |
| 377 | (setq doc (concat doc "\n" doc-ext "\n"))) |
| 378 | (prin1 doc) |
| 379 | (terpri)) |
| 380 | |
| 381 | ;; Arg KEY-BINDINGS |
| 382 | (princ " '(") |
| 383 | (tit-generate-key-bindings tit-backspace 'quail-delete-last-char) |
| 384 | (princ "\n ") |
| 385 | (tit-generate-key-bindings tit-deleteall 'quail-abort-translation) |
| 386 | (princ "\n ") |
| 387 | (tit-generate-key-bindings tit-moveright 'quail-next-translation) |
| 388 | (princ "\n ") |
| 389 | (tit-generate-key-bindings tit-moveleft 'quail-prev-translation) |
| 390 | (princ ")\n") |
| 391 | |
| 392 | ;; Args FORGET-TRANSLATION, DETERMINISTIC, KBD-TRANSLATE, SHOW-LAYOUT. |
| 393 | ;; The remaining args are all nil. |
| 394 | (princ " nil") |
| 395 | (princ (if tit-multichoice " nil" " t")) |
| 396 | (princ (if tit-keyprompt " t t)\n\n" " nil nil)\n\n")))) |
| 397 | |
| 398 | (defsubst tit-flush-translations (key translations) |
| 399 | (if (string-match "\\\\[0-9][0-9][0-9]" key) |
| 400 | (let ((newkey (concat (substring key 0 (match-beginning 0)) |
| 401 | (car (read-from-string |
| 402 | (concat "\"" (match-string 0 key) "\""))))) |
| 403 | (idx (match-end 0))) |
| 404 | (while (string-match "\\\\[0-9][0-9][0-9]" key idx) |
| 405 | (setq newkey (concat |
| 406 | newkey |
| 407 | (substring key idx (match-beginning 0)) |
| 408 | (car (read-from-string |
| 409 | (concat "\"" (match-string 0 key) "\""))))) |
| 410 | (setq idx (match-end 0))) |
| 411 | (setq key (concat newkey (substring key idx))))) |
| 412 | (prin1 (list key (if tit-dictionary translations |
| 413 | (vconcat (nreverse translations))))) |
| 414 | (princ "\n")) |
| 415 | |
| 416 | ;; Convert body part of TIT dictionary into `quail-define-rules' |
| 417 | ;; function call. |
| 418 | (defun tit-process-body () |
| 419 | (message "Formatting translation rules...") |
| 420 | (let* ((template (list nil nil)) |
| 421 | (second (cdr template)) |
| 422 | (prev-key "") |
| 423 | ch key translations pos) |
| 424 | (princ "(quail-define-rules\n") |
| 425 | (while (null (eobp)) |
| 426 | (setq ch (following-char)) |
| 427 | (if (or (= ch ?#) (= ch ?\n)) |
| 428 | (forward-line 1) |
| 429 | (setq pos (point)) |
| 430 | (skip-chars-forward "^ \t\n") |
| 431 | (setq key (buffer-substring pos (point))) |
| 432 | (skip-chars-forward " \t") |
| 433 | (setq ch (following-char)) |
| 434 | (if (or (= ch ?#) (= ch ?\n)) |
| 435 | ;; This entry contains no translations. Let's ignore it. |
| 436 | (forward-line 1) |
| 437 | (or (string= key prev-key) |
| 438 | (progn |
| 439 | (if translations |
| 440 | (tit-flush-translations prev-key translations)) |
| 441 | (setq translations nil |
| 442 | prev-key key))) |
| 443 | (if tit-dictionary |
| 444 | (progn |
| 445 | (setq pos (point)) |
| 446 | (skip-chars-forward "^ \t#\n") |
| 447 | (setq translations |
| 448 | (if translations |
| 449 | (concat translations |
| 450 | (buffer-substring pos (point))) |
| 451 | (buffer-substring pos (point))))) |
| 452 | (while (not (eolp)) |
| 453 | (setq pos (point)) |
| 454 | (skip-chars-forward "^ \t\n") |
| 455 | (setq translations (cons (buffer-substring pos (point)) |
| 456 | translations)) |
| 457 | (skip-chars-forward " \t") |
| 458 | (setq ch (following-char)) |
| 459 | (if (= ch ?#) (end-of-line)))) |
| 460 | (forward-line 1)))) |
| 461 | |
| 462 | (if translations |
| 463 | (tit-flush-translations prev-key translations)) |
| 464 | (princ ")\n"))) |
| 465 | |
| 466 | ;;;###autoload |
| 467 | (defun titdic-convert (filename &optional dirname) |
| 468 | "Convert a TIT dictionary of FILENAME into a Quail package. |
| 469 | Optional argument DIRNAME if specified is the directory name under which |
| 470 | the generated Quail package is saved." |
| 471 | (interactive "FTIT dictionary file: ") |
| 472 | (let ((coding-system-for-write 'iso-2022-7bit)) |
| 473 | (with-temp-file (tit-make-quail-package-file-name filename dirname) |
| 474 | (set-buffer-file-coding-system 'iso-2022-7bit) |
| 475 | (let ((standard-output (current-buffer))) |
| 476 | (with-temp-buffer |
| 477 | (set-buffer-multibyte nil) |
| 478 | (let ((coding-system-for-read 'no-conversion)) |
| 479 | (insert-file-contents (expand-file-name filename))) |
| 480 | |
| 481 | ;; Decode the buffer contents from the encoding specified by a |
| 482 | ;; value of the key "ENCODE:". |
| 483 | (if (not (search-forward "\nBEGIN" nil t)) |
| 484 | (error "TIT dictionary doesn't have body part")) |
| 485 | (let ((limit (point)) |
| 486 | coding-system slot) |
| 487 | (goto-char (point-min)) |
| 488 | (if (re-search-forward "^ENCODE:[ \t]*" limit t) |
| 489 | (progn |
| 490 | (goto-char (match-end 0)) |
| 491 | (setq tit-encode (tit-read-key-value))) |
| 492 | (setq tit-encode tit-default-encode)) |
| 493 | (setq slot (assoc tit-encode tit-encode-list)) |
| 494 | (if (not slot) |
| 495 | (error "Invalid ENCODE: value in TIT dictionary")) |
| 496 | (setq coding-system (nth 1 slot)) |
| 497 | (message "Decoding with coding system %s..." coding-system) |
| 498 | (goto-char (point-min)) |
| 499 | (decode-coding-region (point-min) (point-max) coding-system)) |
| 500 | |
| 501 | ;; Set point the starting position of the body part. |
| 502 | (goto-char (point-min)) |
| 503 | (if (not (search-forward "\nBEGIN" nil t)) |
| 504 | (error "TIT dictionary can't be decoded correctly")) |
| 505 | |
| 506 | ;; Process the header part in multibyte mode. |
| 507 | (with-current-buffer standard-output |
| 508 | (set-buffer-multibyte t)) |
| 509 | (set-buffer-multibyte t) |
| 510 | (forward-line 1) |
| 511 | (narrow-to-region (point-min) (point)) |
| 512 | (tit-process-header filename) |
| 513 | (widen) |
| 514 | |
| 515 | ;; Process the body part. For speed, we turn off multibyte facility. |
| 516 | (with-current-buffer standard-output |
| 517 | (set-buffer-multibyte nil)) |
| 518 | (set-buffer-multibyte nil) |
| 519 | (tit-process-body)))))) |
| 520 | |
| 521 | ;;;###autoload |
| 522 | (defun batch-titdic-convert (&optional force) |
| 523 | "Run `titdic-convert' on the files remaining on the command line. |
| 524 | Use this from the command line, with `-batch'; |
| 525 | it won't work in an interactive Emacs. |
| 526 | For example, invoke \"emacs -batch -f batch-titdic-convert XXX.tit\" to |
| 527 | generate Quail package file \"xxx.el\" from TIT dictionary file \"XXX.tit\". |
| 528 | To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"." |
| 529 | (defvar command-line-args-left) ; Avoid compiler warning. |
| 530 | (if (not noninteractive) |
| 531 | (error "`batch-titdic-convert' should be used only with -batch")) |
| 532 | (if (string= (car command-line-args-left) "-h") |
| 533 | (progn |
| 534 | (message "To convert XXX.tit and YYY.tit into xxx.el and yyy.el:") |
| 535 | (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert XXX.tit YYY.tit") |
| 536 | (message "To convert XXX.tit into DIR/xxx.el:") |
| 537 | (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert -dir DIR XXX.tit")) |
| 538 | (let (targetdir filename files file) |
| 539 | (if (string= (car command-line-args-left) "-dir") |
| 540 | (progn |
| 541 | (setq command-line-args-left (cdr command-line-args-left)) |
| 542 | (setq targetdir (car command-line-args-left)) |
| 543 | (setq command-line-args-left (cdr command-line-args-left)))) |
| 544 | (while command-line-args-left |
| 545 | (setq filename (expand-file-name (car command-line-args-left))) |
| 546 | (if (file-directory-p filename) |
| 547 | (progn |
| 548 | (message "Converting all tit files in the directory %s" filename) |
| 549 | (setq files (directory-files filename t "\\.tit$"))) |
| 550 | (setq files (list filename))) |
| 551 | (while files |
| 552 | (setq file (expand-file-name (car files))) |
| 553 | (when (or force |
| 554 | (file-newer-than-file-p |
| 555 | file (tit-make-quail-package-file-name file targetdir))) |
| 556 | (message "Converting %s to quail-package..." file) |
| 557 | (titdic-convert file targetdir)) |
| 558 | (setq files (cdr files))) |
| 559 | (setq command-line-args-left (cdr command-line-args-left))) |
| 560 | (message "Byte-compile the created files by:") |
| 561 | (message " %% emacs -batch -f batch-byte-compile XXX.el"))) |
| 562 | (kill-emacs 0)) |
| 563 | |
| 564 | \f |
| 565 | ;;; Converter of miscellaneous dictionaries other than TIT format. |
| 566 | |
| 567 | ;; Alist of input method names and the corresponding information. |
| 568 | ;; Each element has this form: |
| 569 | ;; (INPUT-METHOD-NAME ;; Name of the input method. |
| 570 | ;; INPUT-METHOD-TITLE ;; Title string of the input method |
| 571 | ;; DICFILE ;; Name of the source dictionary file. |
| 572 | ;; CODING ;; Coding system of the dictionary file. |
| 573 | ;; QUAILFILE ;; Name of the Quail package file. |
| 574 | ;; CONVERTER ;; Function to generate the Quail package. |
| 575 | ;; COPYRIGHT-NOTICE ;; Copyright notice of the source dictionary. |
| 576 | ;; ) |
| 577 | |
| 578 | (defvar quail-misc-package-ext-info |
| 579 | '(("chinese-b5-tsangchi" "\e$(06A\e(BB" |
| 580 | "cangjie-table.b5" big5 "tsang-b5.el" |
| 581 | tsang-b5-converter |
| 582 | "\ |
| 583 | ;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw> |
| 584 | ;; # |
| 585 | ;; # Permission to copy and distribute both modified and |
| 586 | ;; # unmodified versions is granted without royalty provided |
| 587 | ;; # this notice is preserved.") |
| 588 | |
| 589 | ("chinese-b5-quick" "\e$(0X|\e(BB" |
| 590 | "cangjie-table.b5" big5 "quick-b5.el" |
| 591 | quick-b5-converter |
| 592 | "\ |
| 593 | ;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw> |
| 594 | ;; # |
| 595 | ;; # Permission to copy and distribute both modified and |
| 596 | ;; # unmodified versions is granted without royalty provided |
| 597 | ;; # this notice is preserved.") |
| 598 | |
| 599 | ("chinese-cns-tsangchi" "\e$(GT?\e(BC" |
| 600 | "cangjie-table.cns" iso-2022-cn-ext "tsang-cns.el" |
| 601 | tsang-cns-converter |
| 602 | "\ |
| 603 | ;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw> |
| 604 | ;; # |
| 605 | ;; # Permission to copy and distribute both modified and |
| 606 | ;; # unmodified versions is granted without royalty provided |
| 607 | ;; # this notice is preserved.") |
| 608 | |
| 609 | ("chinese-cns-quick" "\e$(Gv|\e(BC" |
| 610 | "cangjie-table.cns" iso-2022-cn-ext "quick-cns.el" |
| 611 | quick-cns-converter |
| 612 | "\ |
| 613 | ;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw> |
| 614 | ;; # |
| 615 | ;; # Permission to copy and distribute both modified and |
| 616 | ;; # unmodified versions is granted without royalty provided |
| 617 | ;; # this notice is preserved.") |
| 618 | |
| 619 | ("chinese-py" "\e$AF4\e(BG" |
| 620 | "pinyin.map" cn-gb-2312 "PY.el" |
| 621 | py-converter |
| 622 | "\ |
| 623 | ;; \"pinyin.map\" is included in a free package called CCE. It is |
| 624 | ;; available at: |
| 625 | ;; http://ftp.debian.org/debian/dists/potato/main |
| 626 | ;; /source/utils/cce_0.36.orig.tar.gz |
| 627 | ;; This package contains the following copyright notice. |
| 628 | ;; |
| 629 | ;; |
| 630 | ;; Copyright (C) 1999, Rui He, herui@cs.duke.edu |
| 631 | ;; |
| 632 | ;; |
| 633 | ;; CCE(Console Chinese Environment) 0.32 |
| 634 | ;; |
| 635 | ;; CCE is free software; you can redistribute it and/or modify it under the |
| 636 | ;; terms of the GNU General Public License as published by the Free Software |
| 637 | ;; Foundation; either version 1, or (at your option) any later version. |
| 638 | ;; |
| 639 | ;; CCE is distributed in the hope that it will be useful, but WITHOUT ANY |
| 640 | ;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 641 | ;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| 642 | ;; details. |
| 643 | ;; |
| 644 | ;; You should have received a copy of the GNU General Public License along with |
| 645 | ;; CCE; see the file COPYING. If not, write to the Free Software Foundation, |
| 646 | ;; 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.") |
| 647 | |
| 648 | ("chinese-ziranma" "\e$AWTH;\e(B" |
| 649 | "ziranma.cin" cn-gb-2312 "ZIRANMA.el" |
| 650 | ziranma-converter |
| 651 | "\ |
| 652 | ;; \"ziranma.cin\" is included in a free package called CCE. It is |
| 653 | ;; available at: |
| 654 | ;; http://ftp.debian.org/debian/dists/potato/main |
| 655 | ;; /source/utils/cce_0.36.orig.tar.gz |
| 656 | ;; This package contains the following copyright notice. |
| 657 | ;; |
| 658 | ;; |
| 659 | ;; Copyright (C) 1999, Rui He, herui@cs.duke.edu |
| 660 | ;; |
| 661 | ;; |
| 662 | ;; CCE(Console Chinese Environment) 0.32 |
| 663 | ;; |
| 664 | ;; CCE is free software; you can redistribute it and/or modify it under the |
| 665 | ;; terms of the GNU General Public License as published by the Free Software |
| 666 | ;; Foundation; either version 1, or (at your option) any later version. |
| 667 | ;; |
| 668 | ;; CCE is distributed in the hope that it will be useful, but WITHOUT ANY |
| 669 | ;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 670 | ;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| 671 | ;; details. |
| 672 | ;; |
| 673 | ;; You should have received a copy of the GNU General Public License along with |
| 674 | ;; CCE; see the file COPYING. If not, write to the Free Software Foundation, |
| 675 | ;; 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.") |
| 676 | |
| 677 | ("chinese-ctlau" "\e$AAuTA\e(B" |
| 678 | "CTLau.html" cn-gb-2312 "CTLau.el" |
| 679 | ctlau-gb-converter |
| 680 | "\ |
| 681 | ;; \"CTLau.html\" is available at: |
| 682 | ;; |
| 683 | ;; http://umunhum.stanford.edu/~lee/chicomp/CTLau.html |
| 684 | ;; |
| 685 | ;; It contains the following copyright notice: |
| 686 | ;; |
| 687 | ;; # Copyright (C) 1988-2001 Fung Fung Lee (lee@umunhum.stanford.edu) |
| 688 | ;; # |
| 689 | ;; # This program is free software; you can redistribute it and/or |
| 690 | ;; # modify it under the terms of the GNU General Public License |
| 691 | ;; # as published by the Free Software Foundation; either version 2 |
| 692 | ;; # of the License, or any later version. |
| 693 | ;; # |
| 694 | ;; # This program is distributed in the hope that it will be useful, |
| 695 | ;; # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 696 | ;; # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 697 | ;; # GNU General Public License for more details. |
| 698 | ;; # |
| 699 | ;; # You should have received a copy of the GNU General Public License |
| 700 | ;; # along with this program; if not, write to the Free Software Foundation, |
| 701 | ;; # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.") |
| 702 | |
| 703 | ("chinese-ctlaub" "\e$(0N,Gn\e(B" |
| 704 | "CTLau-b5.html" big5 "CTLau-b5.el" |
| 705 | ctlau-b5-converter |
| 706 | "\ |
| 707 | ;; \"CTLau-b5.html\" is available at: |
| 708 | ;; |
| 709 | ;; http://umunhum.stanford.edu/~lee/chicomp/CTLau-b5.html |
| 710 | ;; |
| 711 | ;; It contains the following copyright notice: |
| 712 | ;; |
| 713 | ;; # Copyright (C) 1988-2001 Fung Fung Lee (lee@umunhum.stanford.edu) |
| 714 | ;; # |
| 715 | ;; # This program is free software; you can redistribute it and/or |
| 716 | ;; # modify it under the terms of the GNU General Public License |
| 717 | ;; # as published by the Free Software Foundation; either version 2 |
| 718 | ;; # of the License, or any later version. |
| 719 | ;; # |
| 720 | ;; # This program is distributed in the hope that it will be useful, |
| 721 | ;; # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 722 | ;; # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 723 | ;; # GNU General Public License for more details. |
| 724 | ;; # |
| 725 | ;; # You should have received a copy of the GNU General Public License |
| 726 | ;; # along with this program; if not, write to the Free Software Foundation, |
| 727 | ;; # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.") |
| 728 | )) |
| 729 | |
| 730 | ;; Generate a code of a Quail package in the current buffer from Tsang |
| 731 | ;; dictionary in the buffer DICBUF. The input method name of the |
| 732 | ;; Quail package is NAME, and the title string is TITLE. |
| 733 | |
| 734 | ;; TSANG-P is non-nil, genereate \e$(06AQo\e(B input method. Otherwise |
| 735 | ;; generate \e$(0X|/y\e(B (simple version of \e$(06AQo\e(B). If BIG5-P is non-nil, the |
| 736 | ;; input method is for inputting Big5 characters. Otherwise the input |
| 737 | ;; method is for inputting CNS characters. |
| 738 | |
| 739 | (defun tsang-quick-converter (dicbuf name title tsang-p big5-p) |
| 740 | (let ((fulltitle (if tsang-p (if big5-p "\e$(06AQo\e(B" "\e$(GT?on\e(B") |
| 741 | (if big5-p "\e$(0X|/y\e(B" "\e$(Gv|Mx\e(B"))) |
| 742 | dic) |
| 743 | (goto-char (point-max)) |
| 744 | (if big5-p |
| 745 | (insert (format "\"\e$(0&d'GTT&,!J\e(B%s\e$(0!K\e(BBIG5 |
| 746 | |
| 747 | \e$(0KHM$\e(B%s\e$(0TT&,WoOu\e(B |
| 748 | |
| 749 | [Q \e$(0'D\e(B] [W \e$(0(q\e(B] [E \e$(0'V\e(B] [R \e$(0&H\e(B] [T \e$(0'>\e(B] [Y \e$(0&4\e(B] [U \e$(0&U\e(B] [I \e$(0'B\e(B] [O \e$(0&*\e(B] [P \e$(0'A\e(B] |
| 750 | |
| 751 | [A \e$(0'K\e(B] [S \e$(0&T\e(B] [D \e$(0'N\e(B] [F \e$(0'W\e(B] [G \e$(0&I\e(B] [H \e$(0*M\e(B] [J \e$(0&3\e(B] [L \e$(0&d\e(B] |
| 752 | |
| 753 | [Z ] [X \e$(0[E\e(B] [C \e$(01[\e(B] [V \e$(0&M\e(B] [B \e$(0'M\e(B] [N \e$(0&_\e(B] [M \e$(0&"\e(B] |
| 754 | |
| 755 | \\\\<quail-translation-docstring>\"\n" |
| 756 | fulltitle fulltitle)) |
| 757 | (insert (format "\"\e$(GDcEFrSD+!J\e(B%s\e$(G!K\e(BCNS |
| 758 | |
| 759 | \e$(GiGk#\e(B%s\e$(GrSD+uomu\e(B |
| 760 | |
| 761 | [Q \e$(GEC\e(B] [W \e$(GFp\e(B] [E \e$(GEU\e(B] [R \e$(GDG\e(B] [T \e$(GE=\e(B] [Y \e$(GD3\e(B] [U \e$(GDT\e(B] [I \e$(GEA\e(B] [O \e$(GD)\e(B] [P \e$(GE@\e(B] |
| 762 | |
| 763 | [A \e$(GEJ\e(B] [S \e$(GDS\e(B] [D \e$(GEM\e(B] [F \e$(GEV\e(B] [G \e$(GDH\e(B] [H \e$(GHL\e(B] [J \e$(GD2\e(B] [L \e$(GDc\e(B] |
| 764 | |
| 765 | [Z ] [X \e$(GyE\e(B] [C \e$(GOZ\e(B] [V \e$(GDL\e(B] [B \e$(GEL\e(B] [N \e$(GD^\e(B] [M \e$(GD!\e(B] |
| 766 | |
| 767 | \\\\<quail-translation-docstring>\"\n" |
| 768 | fulltitle fulltitle))) |
| 769 | (insert " '((\".\" . quail-next-translation-block) |
| 770 | (\",\" . quail-prev-translation-block)) |
| 771 | nil nil)\n\n") |
| 772 | (insert "(quail-define-rules\n") |
| 773 | (save-excursion |
| 774 | (set-buffer dicbuf) |
| 775 | ;; Handle double CR line ends, which result when checking out of |
| 776 | ;; CVS on MS-Windows. |
| 777 | (goto-char (point-min)) |
| 778 | (while (re-search-forward "\r\r$" nil t) |
| 779 | (replace-match "")) |
| 780 | (goto-char (point-min)) |
| 781 | (search-forward "A440") |
| 782 | (beginning-of-line) |
| 783 | (let ((table (make-hash-table :test 'equal)) |
| 784 | val) |
| 785 | (while (not (eobp)) |
| 786 | (forward-char 5) |
| 787 | (let ((trans (char-to-string (following-char))) |
| 788 | key slot) |
| 789 | (re-search-forward "[A-Z]+$" nil t) |
| 790 | (setq key (downcase |
| 791 | (if (or tsang-p |
| 792 | (<= (- (match-end 0) (match-beginning 0)) 1)) |
| 793 | (match-string 0) |
| 794 | (string (char-after (match-beginning 0)) |
| 795 | (char-after (1- (match-end 0))))))) |
| 796 | (setq val (gethash key table)) |
| 797 | (if val (setq trans (concat val trans))) |
| 798 | (puthash key trans table) |
| 799 | (forward-line 1))) |
| 800 | (maphash #'(lambda (key val) (setq dic (cons (cons key val) dic))) |
| 801 | table))) |
| 802 | (setq dic (sort dic (function (lambda (x y) (string< (car x ) (car y)))))) |
| 803 | (dolist (elt dic) |
| 804 | (insert (format "(%S\t%S)\n" (car elt) (cdr elt)))) |
| 805 | (let ((punctuations '((";" "\e$(0!'!2!"!#!.!/\e(B" "\e$(G!'!2!"!#!.!/\e(B") |
| 806 | (":" "\e$(0!(!+!3!%!$!&!0!1\e(B" "\e$(G!(!+!3!%!$!&!0!1\e(B") |
| 807 | ("'" "\e$(0!e!d\e(B" "\e$(G!e!d\e(B") |
| 808 | ("\"" "\e$(0!g!f!h!i!q\e(B" "\e$(G!g!f!h!i!q\e(B") |
| 809 | ("\\" "\e$(0"`"b#M\e(B" "\e$(G"`"b#M\e(B") |
| 810 | ("|" "\e$(0!6!8!:"^\e(B" "\e$(G!6!8!:"^\e(B") |
| 811 | ("/" "\e$(0"_"a#L\e(B" "\e$(G"_"a#L\e(B") |
| 812 | ("?" "\e$(0!)!4\e(B" "\e$(G!)!4\e(B") |
| 813 | ("<" "\e$(0!R"6"A!T"H\e(B" "\e$(G!R"6"A!T"H\e(B") |
| 814 | (">" "\e$(0!S"7"B!U\e(B" "\e$(G!S"7"B!U\e(B") |
| 815 | ("[" "\e$(0!F!J!b!H!L!V!Z!X!\\e(B" "\e$(G!F!J!b!H!L!V!Z!X!\\e(B") |
| 816 | ("]" "\e$(0!G!K!c!I!M!W![!Y!]\e(B" "\e$(G!G!K!c!I!M!W![!Y!]\e(B") |
| 817 | ("{" "\e$(0!B!`!D\e(B " "\e$(G!B!`!D\e(B ") |
| 818 | ("}" "\e$(0!C!a!E\e(B" "\e$(G!C!a!E\e(B") |
| 819 | ("`" "\e$(0!j!k\e(B" "\e$(G!j!k\e(B") |
| 820 | ("~" "\e$(0"D"+",!<!=\e(B" "\e$(G"D"+",!<!=\e(B") |
| 821 | ("!" "\e$(0!*!5\e(B" "\e$(G!*!5\e(B") |
| 822 | ("@" "\e$(0"i"n\e(B" "\e$(G"i"n\e(B") |
| 823 | ("#" "\e$(0!l"-\e(B" "\e$(G!l"-\e(B") |
| 824 | ("$" "\e$(0"c"l\e(B" "\e$(G"c"l\e(B") |
| 825 | ("%" "\e$(0"h"m\e(B" "\e$(G"h"m\e(B") |
| 826 | ("&" "\e$(0!m".\e(B" "\e$(G!m".\e(B") |
| 827 | ("*" "\e$(0!n"/!o!w!x\e(B" "\e$(G!n"/!o!w!x\e(B") |
| 828 | ("(" "\e$(0!>!^!@\e(B" "\e$(G!>!^!@\e(B") |
| 829 | (")" "\e$(0!?!_!A\e(B" "\e$(G!?!_!A\e(B") |
| 830 | ("-" "\e$(0!7!9"#"$"1"@\e(B" "\e$(G!7!9"#"$"1"@\e(B") |
| 831 | ("_" "\e$(0"%"&\e(B" "\e$(G"%"&\e(B") |
| 832 | ("=" "\e$(0"8"C\e(B" "\e$(G"8"C\e(B") |
| 833 | ("+" "\e$(0"0"?\e(B" "\e$(G"0"?\e(B")))) |
| 834 | (dolist (elt punctuations) |
| 835 | (insert (format "(%S %S)\n" (concat "z" (car elt)) |
| 836 | (if big5-p (nth 1 elt) (nth 2 elt)))))) |
| 837 | (insert ")\n"))) |
| 838 | |
| 839 | (defun tsang-b5-converter (dicbuf name title) |
| 840 | (tsang-quick-converter dicbuf name title t t)) |
| 841 | |
| 842 | (defun quick-b5-converter (dicbuf name title) |
| 843 | (tsang-quick-converter dicbuf name title nil t)) |
| 844 | |
| 845 | (defun tsang-cns-converter (dicbuf name title) |
| 846 | (tsang-quick-converter dicbuf name title t nil)) |
| 847 | |
| 848 | (defun quick-cns-converter (dicbuf name title) |
| 849 | (tsang-quick-converter dicbuf name title nil nil)) |
| 850 | |
| 851 | ;; Generate a code of a Quail package in the current buffer from |
| 852 | ;; Pinyin dictionary in the buffer DICBUF. The input method name of |
| 853 | ;; the Quail package is NAME, and the title string is TITLE. |
| 854 | |
| 855 | (defun py-converter (dicbuf name title) |
| 856 | (goto-char (point-max)) |
| 857 | (insert (format "%S\n" "\e$A::WVJdHk!KF4Rt!K\e(B |
| 858 | |
| 859 | \e$AF4Rt7=08\e(B |
| 860 | |
| 861 | \e$AP!P4S"NDWVD84z1m!8F4Rt!97{:E#,\e(B \"u(yu) \e$ATrSC\e(B u: \e$A1mJ>!C\e(B |
| 862 | |
| 863 | Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312'). |
| 864 | |
| 865 | Pinyin is the standard roman transliteration method for Chinese. |
| 866 | Pinyin uses a sequence of Latin alphabetic characters for each Chinese |
| 867 | character. The sequence is made by the combination of the initials |
| 868 | \(the beginning sounds) and finals (the ending sounds). |
| 869 | |
| 870 | initials: b p m f d t n l z c s zh ch sh r j q x g k h |
| 871 | finals: a o e i er ai ei oa ou an en ang eng ong i ia iao ie iu ian in |
| 872 | iang ing iong u ua uo uai ui uan un uan ueng yu yue yuan yun |
| 873 | |
| 874 | (Note: In the correct Pinyin writing, the sequence \"yu\" in the last |
| 875 | four finals should be written by the character u-umlaut `\e$A(9\e(B'.) |
| 876 | |
| 877 | With this input method, you enter a Chinese character by first |
| 878 | entering its pinyin spelling. |
| 879 | |
| 880 | \\<quail-translation-docstring> |
| 881 | |
| 882 | For instance, to input \e$ADc\e(B, you type \"n i C-n 3\". The first \"n i\" |
| 883 | is a Pinyin, \"C-n\" selects the next group of candidates (each group |
| 884 | contains at most 10 characters), \"3\" select the third character in |
| 885 | that group. |
| 886 | |
| 887 | This input method supports only Han characters. The related input |
| 888 | method `chinese-py-punct' is the combination of this method and |
| 889 | `chinese-punct'; it supports both Han characters and punctuation |
| 890 | characters. |
| 891 | |
| 892 | For double-width GB2312 characters corresponding to ASCII, use the |
| 893 | input method `chinese-qj'. |
| 894 | |
| 895 | The correct Pinyin system specifies tones by diacritical marks, but |
| 896 | this input method doesn't use them, which results in easy (you don't |
| 897 | have to know the exact tones), but verbose (many characters are assigned |
| 898 | to the same key sequence) input. You may also want to try the input |
| 899 | method `chinese-tonepy' with which you must specify tones by digits |
| 900 | \(1..5).")) |
| 901 | (insert " '((\"\C-?\" . quail-delete-last-char) |
| 902 | (\".\" . quail-next-translation) |
| 903 | (\">\" . quail-next-translation) |
| 904 | (\",\" . quail-prev-translation) |
| 905 | (\"<\" . quail-prev-translation)) |
| 906 | nil nil nil nil)\n\n") |
| 907 | (insert "(quail-define-rules\n") |
| 908 | (let ((pos (point))) |
| 909 | (insert-buffer-substring dicbuf) |
| 910 | (goto-char pos) |
| 911 | (while (not (eobp)) |
| 912 | (insert "(\"") |
| 913 | (skip-chars-forward "a-z") |
| 914 | (insert "\" \"") |
| 915 | (delete-char 1) |
| 916 | (end-of-line) |
| 917 | (insert "\")") |
| 918 | (forward-line 1))) |
| 919 | (insert ")\n")) |
| 920 | |
| 921 | ;; Generate a code of a Quail package in the current buffer from |
| 922 | ;; Ziranma dictionary in the buffer DICBUF. The input method name of |
| 923 | ;; the Quail package is NAME, and the title string is TITLE. |
| 924 | |
| 925 | (defun ziranma-converter (dicbuf name title) |
| 926 | (let (dic) |
| 927 | (save-excursion |
| 928 | (set-buffer dicbuf) |
| 929 | (goto-char (point-min)) |
| 930 | (search-forward "%keyname end\n") |
| 931 | (let ((table (make-hash-table :test 'equal)) |
| 932 | elt pos key trans val) |
| 933 | (while (not (eobp)) |
| 934 | (setq pos (point)) |
| 935 | (skip-chars-forward "^ \t") |
| 936 | (setq key (buffer-substring pos (point))) |
| 937 | (skip-chars-forward " \t") |
| 938 | (setq trans (vector (buffer-substring (point) (line-end-position)))) |
| 939 | (setq val (gethash key table)) |
| 940 | (if val (setq trans (vconcat val trans))) |
| 941 | (puthash key trans table) |
| 942 | (forward-line 1)) |
| 943 | (maphash #'(lambda (key trans) |
| 944 | (let ((len (length trans)) |
| 945 | i) |
| 946 | (if (and (= len 1) (= (length (aref trans 0)) 1)) |
| 947 | (setq trans (aref trans 0)) |
| 948 | (setq i 0) |
| 949 | (while (and (< i len) |
| 950 | (= (length (aref trans i)) 1)) |
| 951 | (setq i (1+ i))) |
| 952 | (if (= i len) |
| 953 | (setq trans (mapconcat 'identity trans ""))))) |
| 954 | (setq dic (cons (cons key trans) dic))) |
| 955 | table))) |
| 956 | (setq dic (sort dic (function (lambda (x y) (string< (car x) (car y)))))) |
| 957 | (goto-char (point-max)) |
| 958 | (insert (format "%S\n" "\e$A::WVJdHk!K!>WTH;!?!K\e(B |
| 959 | |
| 960 | \e$A<|EL6TUU1m\e(B: |
| 961 | \e$A)3)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)7\e(B |
| 962 | \e$A)'#Q\e(B \e$A)'#W\e(B \e$A)'#E\e(B \e$A)'#R\e(B \e$A)'#T\e(B \e$A)'#Y\e(B \e$A)'#U\e(Bsh\e$A)'#I\e(Bch\e$A)'#O\e(B \e$A)'#P\e(B \e$A)'\e(B |
| 963 | \e$A)'\e(B iu\e$A)'\e(B ua\e$A)'\e(B e\e$A)'\e(B uan\e$A)'\e(B ue\e$A)'\e(B uai\e$A)'\e(B u\e$A)'\e(B i\e$A)'\e(B o\e$A)'\e(B un\e$A)'\e(B |
| 964 | \e$A)'\e(B \e$A)'\e(B ia\e$A)'\e(B \e$A)'\e(B van\e$A)'\e(B ve\e$A)'\e(B ing\e$A)'\e(B \e$A)'\e(B \e$A)'\e(B uo\e$A)'\e(B vn\e$A)'\e(B |
| 965 | \e$A);)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)?\e(B |
| 966 | \e$A)'#A\e(B \e$A)'#S\e(B \e$A)'#D\e(B \e$A)'#F\e(B \e$A)'#G\e(B \e$A)'#H\e(B \e$A)'#J\e(B \e$A)'#K\e(B \e$A)'#L\e(B \e$A)'\e(B |
| 967 | \e$A)'\e(B a\e$A)'\e(Biong\e$A)'\e(Buang\e$A)'\e(B en\e$A)'\e(B eng\e$A)'\e(B ang\e$A)'\e(B an\e$A)'\e(B ao\e$A)'\e(B ai\e$A)'\e(B |
| 968 | \e$A)'\e(B \e$A)'\e(B ong\e$A)'\e(Biang\e$A)'\e(B \e$A)'\e(B ng\e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B |
| 969 | \e$A);)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)%)7\e(B |
| 970 | \e$A)'#Z\e(B \e$A)'#X\e(B \e$A)'#C\e(B \e$A)'#V\e(Bzh\e$A)'#B\e(B \e$A)'#N\e(B \e$A)'#M\e(B \e$A)'#,\e(B \e$A)'#.\e(B \e$A)'\e(B \e$A#/\e(B \e$A)'\e(B |
| 971 | \e$A)'\e(B ei\e$A)'\e(B ie\e$A)'\e(B iao\e$A)'\e(B ui\e$A)'\e(B ou\e$A)'\e(B in\e$A)'\e(B ian\e$A)'G0R3)':sR3)'7{:E)'\e(B |
| 972 | \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B v\e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B \e$A)'\e(B |
| 973 | \e$A);)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)?\e(B |
| 974 | |
| 975 | |
| 976 | Pinyin base input method for Chinese GB2312 characters (`chinese-gb2312'). |
| 977 | |
| 978 | Pinyin is the standard roman transliteration method for Chinese. |
| 979 | For the details of Pinyin system, see the documentation of the input |
| 980 | method `chinese-py'. |
| 981 | |
| 982 | Unlike the standard spelling of Pinyin, in this input method all |
| 983 | initials and finals are assigned to single keys (see the above table). |
| 984 | For instance, the initial \"ch\" is assigned to the key `i', the final |
| 985 | \"iu\" is assigned to the key `q', and tones 1, 2, 3, 4, and \e$AGaIy\e(B are |
| 986 | assigned to the keys `q', `w', `e', `r', `t' respectively. |
| 987 | |
| 988 | \\<quail-translation-docstring> |
| 989 | |
| 990 | To input one-letter words, you type 4 keys, the first two for the |
| 991 | Pinyin of the letter, next one for tone, and the last one is always a |
| 992 | quote ('). For instance, \"vsq'\" input \e$AVP\e(B. Exceptions are these |
| 993 | letters. You can input them just by typing a single key. |
| 994 | |
| 995 | Character: \e$A04\e(B \e$A2;\e(B \e$A4N\e(B \e$A5D\e(B \e$A6~\e(B \e$A7"\e(B \e$A8v\e(B \e$A:M\e(B \e$A3v\e(B \e$A<0\e(B \e$A?I\e(B \e$AAK\e(B \e$AC;\e(B |
| 996 | Key: a b c d e f g h i j k l m |
| 997 | Character: \e$ADc\e(B \e$AE7\e(B \e$AF,\e(B \e$AF_\e(B \e$AHK\e(B \e$AH}\e(B \e$AK{\e(B \e$AJG\e(B \e$AWE\e(B \e$ANR\e(B \e$AP!\e(B \e$AR;\e(B \e$ATZ\e(B |
| 998 | Key: n o p q r s t u v w x y z |
| 999 | |
| 1000 | To input two-letter words, you have two ways. One way is to type 4 |
| 1001 | keys, two for the first Pinyin, two for the second Pinyin. For |
| 1002 | instance, \"vsgo\" inputs \e$AVP9z\e(B. Another way is to type 3 keys: 2 |
| 1003 | initials of two letters, and quote ('). For instance, \"vg'\" also |
| 1004 | inputs \e$AVP9z\e(B. |
| 1005 | |
| 1006 | To input three-letter words, you type 4 keys: initials of three |
| 1007 | letters, and the last is quote ('). For instance, \"bjy'2\" inputs \e$A11\e(B |
| 1008 | \e$A>)Q<\e(B (the last `2' is to select one of the candidates). |
| 1009 | |
| 1010 | To input words of more than three letters, you type 4 keys, initials |
| 1011 | of the first three letters and the last letter. For instance, |
| 1012 | \"bjdt\" inputs \e$A11>)5gJSL(\e(B. |
| 1013 | |
| 1014 | To input symbols and punctuations, type `/' followed by one of `a' to |
| 1015 | `z', then select one of the candidates.")) |
| 1016 | (insert " '((\"\C-?\" . quail-delete-last-char) |
| 1017 | (\".\" . quail-next-translation) |
| 1018 | (\"[\" . quail-next-translation) |
| 1019 | (\",\" . quail-prev-translation) |
| 1020 | (\"]\" . quail-prev-translation)) |
| 1021 | nil nil nil nil)\n\n") |
| 1022 | (insert "(quail-define-rules\n") |
| 1023 | (dolist (elt dic) |
| 1024 | (insert (format "(%S %S)\n" (car elt) (cdr elt)))) |
| 1025 | (insert ")\n"))) |
| 1026 | |
| 1027 | ;; Generate the code for a Quail package in the current buffer from a |
| 1028 | ;; CTLau or CTLau-b5 dictionary in the buffer DICBUF. The input |
| 1029 | ;; method name of the Quail package is NAME, and the title string is |
| 1030 | ;; TITLE. DESCRIPTION is the string shown by describe-input-method. |
| 1031 | |
| 1032 | (defun ctlau-converter (dicbuf name title description) |
| 1033 | (goto-char (point-max)) |
| 1034 | (insert (format "%S\n" description)) |
| 1035 | (insert " '((\"\C-?\" . quail-delete-last-char) |
| 1036 | (\".\" . quail-next-translation) |
| 1037 | (\">\" . quail-next-translation) |
| 1038 | (\",\" . quail-prev-translation) |
| 1039 | (\"<\" . quail-prev-translation)) |
| 1040 | nil nil nil nil)\n\n") |
| 1041 | (insert "(quail-define-rules\n") |
| 1042 | (let (dicbuf-start dicbuf-end key-start key (pos (point))) |
| 1043 | ;; Find the dictionary, which starts below a horizontal rule and |
| 1044 | ;; ends at the second to last line in the HTML file. |
| 1045 | (save-excursion |
| 1046 | (set-buffer dicbuf) |
| 1047 | (goto-char (point-min)) |
| 1048 | (search-forward "#\n#<hr>\n") |
| 1049 | (setq dicbuf-start (point)) |
| 1050 | (goto-char (point-max)) |
| 1051 | (forward-line -1) |
| 1052 | (setq dicbuf-end (point))) |
| 1053 | (insert-buffer-substring dicbuf dicbuf-start dicbuf-end) |
| 1054 | ;; CTLau-b5.html contains characters (0xa1 0xbc) which show up as |
| 1055 | ;; hollow boxes when the original characters in CTLau.html from |
| 1056 | ;; which the file is converted have no Big5 equivalent. Go |
| 1057 | ;; through and delete them. |
| 1058 | (goto-char pos) |
| 1059 | (while (search-forward "\e$(0!{\e(B" nil t) |
| 1060 | (delete-char -1)) |
| 1061 | ;; Uppercase keys in dictionary need to be downcased. Backslashes |
| 1062 | ;; at the beginning of keys need to be turned into double |
| 1063 | ;; backslashes. |
| 1064 | (goto-char pos) |
| 1065 | (while (not (eobp)) |
| 1066 | (insert "(\"") |
| 1067 | (if (char-equal (following-char) ?\\) |
| 1068 | (insert "\\")) |
| 1069 | (setq key-start (point)) |
| 1070 | (skip-chars-forward "\\\\A-Z") |
| 1071 | (downcase-region key-start (point)) |
| 1072 | (insert "\" \"") |
| 1073 | (delete-char 1) |
| 1074 | (end-of-line) |
| 1075 | (insert "\")") |
| 1076 | (forward-line 1))) |
| 1077 | (insert ")\n")) |
| 1078 | |
| 1079 | (defun ctlau-gb-converter (dicbuf name title) |
| 1080 | (ctlau-converter dicbuf name title |
| 1081 | "\e$A::WVJdHk!KAuN}OiJ=TARt!K\e(B |
| 1082 | |
| 1083 | \e$AAuN}OiJ=TASoW"Rt7=08\e(B |
| 1084 | Sidney Lau's Cantonese transcription scheme as described in his book |
| 1085 | \"Elementary Cantonese\", The Government Printer, Hong Kong, 1972. |
| 1086 | This file was prepared by Fung Fung Lee (\e$A@n7c7e\e(B). |
| 1087 | Originally converted from CTCPS3.tit |
| 1088 | Last modified: June 2, 1993. |
| 1089 | |
| 1090 | Some infrequent GB characters are accessed by typing \\, followed by |
| 1091 | the Cantonese romanization of the respective radical (\e$A2?JW\e(B).")) |
| 1092 | |
| 1093 | (defun ctlau-b5-converter (dicbuf name title) |
| 1094 | (ctlau-converter dicbuf name title |
| 1095 | "\e$(0KH)tTT&,!(N,Tg>A*#Gn5x!(\e(B |
| 1096 | |
| 1097 | \e$(0N,Tg>A*#GnM$0D5x'J7{\e(B |
| 1098 | Sidney Lau's Cantonese transcription scheme as described in his book |
| 1099 | \"Elementary Cantonese\", The Government Printer, Hong Kong, 1972. |
| 1100 | This file was prepared by Fung Fung Lee (\e$(0,XFS76\e(B). |
| 1101 | Originally converted from CTCPS3.tit |
| 1102 | Last modified: June 2, 1993. |
| 1103 | |
| 1104 | Some infrequent characters are accessed by typing \\, followed by |
| 1105 | the Cantonese romanization of the respective radical (\e$(0?f5}\e(B).")) |
| 1106 | |
| 1107 | (defun miscdic-convert (filename &optional dirname) |
| 1108 | "Convert a dictionary file FILENAME into a Quail package. |
| 1109 | Optional argument DIRNAME if specified is the directory name under which |
| 1110 | the generated Quail package is saved." |
| 1111 | (interactive "FInput method dictionary file: ") |
| 1112 | (or (file-readable-p filename) |
| 1113 | (error "%s does not exist" filename)) |
| 1114 | (let ((tail quail-misc-package-ext-info) |
| 1115 | (default-buffer-file-coding-system 'iso-2022-7bit) |
| 1116 | slot |
| 1117 | name title dicfile coding quailfile converter copyright |
| 1118 | dicbuf) |
| 1119 | (while tail |
| 1120 | (setq slot (car tail) |
| 1121 | dicfile (nth 2 slot) |
| 1122 | quailfile (nth 4 slot)) |
| 1123 | (when (and (or (string-match dicfile filename) |
| 1124 | ;; MS-DOS filesystem truncates file names to 8+3 |
| 1125 | ;; limits, so "cangjie-table.cns" becomes |
| 1126 | ;; "cangjie-.cns", and the above string-match |
| 1127 | ;; fails. Give DOS users a chance... |
| 1128 | (and (fboundp 'msdos-long-file-names) |
| 1129 | (not (msdos-long-file-names)) |
| 1130 | (string-match (dos-8+3-filename dicfile) filename))) |
| 1131 | (if (file-newer-than-file-p |
| 1132 | filename (expand-file-name quailfile dirname)) |
| 1133 | t |
| 1134 | (message "%s is up to date" quailfile) |
| 1135 | nil)) |
| 1136 | (setq name (car slot) |
| 1137 | title (nth 1 slot) |
| 1138 | coding (nth 3 slot) |
| 1139 | converter (nth 5 slot) |
| 1140 | copyright (nth 6 slot)) |
| 1141 | (message "Converting %s to %s..." dicfile quailfile) |
| 1142 | (with-temp-file (expand-file-name quailfile dirname) |
| 1143 | (set-buffer-file-coding-system 'iso-2022-7bit) |
| 1144 | (insert ";; Quail package `" name "' -*- coding:iso-2022-7bit; -*-\n") |
| 1145 | (insert ";; Generated by the command `miscdic-convert'\n") |
| 1146 | (insert ";; Date: " (current-time-string) "\n") |
| 1147 | (insert ";; Source dictionary file: " dicfile "\n") |
| 1148 | (insert ";; Copyright notice of the source file\n") |
| 1149 | (insert ";;------------------------------------------------------\n") |
| 1150 | (insert copyright "\n") |
| 1151 | (insert ";;------------------------------------------------------\n") |
| 1152 | (insert "\n") |
| 1153 | (insert ";;; Code:\n\n") |
| 1154 | (insert "(require 'quail)\n") |
| 1155 | (insert "(quail-define-package \"" name "\" \"" |
| 1156 | (if (eq coding 'big5) "Chinese-BIG5" "Chinese-CNS") |
| 1157 | "\" \"" title "\" t\n") |
| 1158 | (let* ((coding-system-for-read coding) |
| 1159 | (dicbuf (find-file-noselect filename))) |
| 1160 | (funcall converter dicbuf name title) |
| 1161 | (kill-buffer dicbuf))) |
| 1162 | (message "Converting %s to %s...done" dicfile quailfile)) |
| 1163 | (setq tail (cdr tail))))) |
| 1164 | |
| 1165 | (defun batch-miscdic-convert () |
| 1166 | "Run `miscdic-convert' on the files remaing on the command line. |
| 1167 | Use this from the command line, with `-batch'; |
| 1168 | it won't work in an interactive Emacs. |
| 1169 | If there's an argument \"-dir\", the next argument specifies a directory |
| 1170 | to store generated Quail packages." |
| 1171 | (defvar command-line-args-left) ; Avoid compiler warning. |
| 1172 | (if (not noninteractive) |
| 1173 | (error "`batch-miscdic-convert' should be used only with -batch")) |
| 1174 | (let ((dir default-directory) |
| 1175 | filename) |
| 1176 | (while command-line-args-left |
| 1177 | (if (string= (car command-line-args-left) "-dir") |
| 1178 | (progn |
| 1179 | (setq command-line-args-left (cdr command-line-args-left)) |
| 1180 | (setq dir (car command-line-args-left)) |
| 1181 | (setq command-line-args-left (cdr command-line-args-left)))) |
| 1182 | (setq filename (car command-line-args-left) |
| 1183 | command-line-args-left (cdr command-line-args-left)) |
| 1184 | (if (file-directory-p filename) |
| 1185 | (dolist (file (directory-files filename t nil t)) |
| 1186 | (miscdic-convert file dir)) |
| 1187 | (miscdic-convert filename dir)))) |
| 1188 | (kill-emacs 0)) |
| 1189 | |
| 1190 | ;; Local Variables: |
| 1191 | ;; coding: iso-2022-7bit |
| 1192 | ;; End: |
| 1193 | |
| 1194 | ;;; arch-tag: 8ad478b2-a985-4da2-b47f-d8ee5d7c24a3 |
| 1195 | ;;; titdic-cnv.el ends here |