| 1 | ;;; ja-dic-cnv.el --- convert a Japanese dictionary (SKK-JISYO.L) to Emacs Lisp |
| 2 | |
| 3 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
| 4 | ;; 2005, 2006, 2007, 2008, 2009, 2010 |
| 5 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
| 6 | ;; Registration Number H14PRO021 |
| 7 | |
| 8 | ;; Keywords: i18n, mule, multilingual, Japanese |
| 9 | |
| 10 | ;; This file is part of GNU Emacs. |
| 11 | |
| 12 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 13 | ;; it under the terms of the GNU General Public License as published by |
| 14 | ;; the Free Software Foundation, either version 3 of the License, or |
| 15 | ;; (at your option) any later version. |
| 16 | |
| 17 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 18 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | ;; GNU General Public License for more details. |
| 21 | |
| 22 | ;; You should have received a copy of the GNU General Public License |
| 23 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 24 | |
| 25 | ;;; Commentary: |
| 26 | |
| 27 | ;; SKK is a Japanese input method running on Mule created by Masahiko |
| 28 | ;; Sato <masahiko@sato.riec.tohoku.ac.jp>. Here we provide utilities |
| 29 | ;; to handle a dictionary distributed with SKK so that a different |
| 30 | ;; input method (e.g. quail-japanese) can utilize the dictionary. |
| 31 | |
| 32 | ;; The format of SKK dictionary is quite simple. Each line has the |
| 33 | ;; form "KANASTRING /CONV1/CONV2/.../" which means KANASTRING (\e$B2>L>J8\e(B |
| 34 | ;; \e$B;zNs\e(B) can be converted to one of CONVi. CONVi is a Kanji (\e$B4A;z\e(B) |
| 35 | ;; and Kana (\e$B2>L>\e(B) mixed string. |
| 36 | ;; |
| 37 | ;; KANASTRING may have a trailing ASCII letter for Okurigana (\e$BAw$j2>L>\e(B) |
| 38 | ;; information. For instance, the trailing letter `k' means that one |
| 39 | ;; of the following Okurigana is allowed: \e$B$+$-$/$1$3\e(B. So, in that |
| 40 | ;; case, the string "KANASTRING\e$B$/\e(B" can be converted to one of "CONV1\e$B$/\e(B", |
| 41 | ;; CONV2\e$B$/\e(B, ... |
| 42 | |
| 43 | ;;; Code: |
| 44 | |
| 45 | ;; Name of a file to generate from SKK dictionary. |
| 46 | (defvar ja-dic-filename "ja-dic.el") |
| 47 | |
| 48 | (defun skkdic-convert-okuri-ari (skkbuf buf) |
| 49 | (message "Processing OKURI-ARI entries ...") |
| 50 | (goto-char (point-min)) |
| 51 | (with-current-buffer buf |
| 52 | (insert ";; Setting okuri-ari entries.\n" |
| 53 | "(skkdic-set-okuri-ari\n")) |
| 54 | (while (not (eobp)) |
| 55 | (if (/= (following-char) ?>) |
| 56 | (let ((from (point)) |
| 57 | (to (line-end-position))) |
| 58 | (with-current-buffer buf |
| 59 | (insert-buffer-substring skkbuf from to) |
| 60 | (beginning-of-line) |
| 61 | (insert "\"") |
| 62 | (search-forward " ") |
| 63 | (delete-char 1) ; delete the first '/' |
| 64 | (let ((p (point))) |
| 65 | (end-of-line) |
| 66 | (delete-char -1) ; delete the last '/' |
| 67 | (subst-char-in-region p (point) ?/ ? 'noundo)) |
| 68 | (insert "\"\n")))) |
| 69 | |
| 70 | (forward-line 1)) |
| 71 | (with-current-buffer buf |
| 72 | (insert ")\n\n"))) |
| 73 | |
| 74 | (defconst skkdic-postfix-list '(skkdic-postfix-list)) |
| 75 | |
| 76 | (defconst skkdic-postfix-data |
| 77 | '(("\e$B$$$-\e(B" "\e$B9T\e(B") |
| 78 | ("\e$B$,$+$j\e(B" "\e$B78\e(B") |
| 79 | ("\e$B$,$/\e(B" "\e$B3X\e(B") |
| 80 | ("\e$B$,$o\e(B" "\e$B@n\e(B") |
| 81 | ("\e$B$7$c\e(B" "\e$B<R\e(B") |
| 82 | ("\e$B$7$e$&\e(B" "\e$B=8\e(B") |
| 83 | ("\e$B$7$g$&\e(B" "\e$B>^\e(B" "\e$B>k\e(B") |
| 84 | ("\e$B$8$g$&\e(B" "\e$B>k\e(B") |
| 85 | ("\e$B$;$s\e(B" "\e$B@~\e(B") |
| 86 | ("\e$B$@$1\e(B" "\e$B3Y\e(B") |
| 87 | ("\e$B$A$c$/\e(B" "\e$BCe\e(B") |
| 88 | ("\e$B$F$s\e(B" "\e$BE9\e(B") |
| 89 | ("\e$B$H$&$2\e(B" "\e$BF=\e(B") |
| 90 | ("\e$B$I$*$j\e(B" "\e$BDL$j\e(B") |
| 91 | ("\e$B$d$^\e(B" "\e$B;3\e(B") |
| 92 | ("\e$B$P$7\e(B" "\e$B66\e(B") |
| 93 | ("\e$B$O$D\e(B" "\e$BH/\e(B") |
| 94 | ("\e$B$b$/\e(B" "\e$BL\\e(B") |
| 95 | ("\e$B$f$-\e(B" "\e$B9T\e(B"))) |
| 96 | |
| 97 | (defun skkdic-convert-postfix (skkbuf buf) |
| 98 | (message "Processing POSTFIX entries ...") |
| 99 | (goto-char (point-min)) |
| 100 | (with-current-buffer buf |
| 101 | (insert ";; Setting postfix entries.\n" |
| 102 | "(skkdic-set-postfix\n")) |
| 103 | |
| 104 | ;; Initialize SKKDIC-POSTFIX-LIST by predefined data |
| 105 | ;; SKKDIC-POSTFIX-DATA. |
| 106 | (with-current-buffer buf |
| 107 | (let ((l skkdic-postfix-data) |
| 108 | kana candidates entry) |
| 109 | (while l |
| 110 | (setq kana (car (car l)) candidates (cdr (car l))) |
| 111 | (insert "\"" kana) |
| 112 | (while candidates |
| 113 | (insert " " (car candidates)) |
| 114 | (setq entry (lookup-nested-alist (car candidates) |
| 115 | skkdic-postfix-list nil nil t)) |
| 116 | (if (consp (car entry)) |
| 117 | (setcar entry (cons kana (car entry))) |
| 118 | (set-nested-alist (car candidates) (list kana) |
| 119 | skkdic-postfix-list)) |
| 120 | (setq candidates (cdr candidates))) |
| 121 | (insert "\"\n") |
| 122 | (setq l (cdr l))))) |
| 123 | |
| 124 | ;; Search postfix entries. |
| 125 | (while (re-search-forward "^[#<>?]\\(\\(\\cH\\|\e$B!<\e(B\\)+\\) " nil t) |
| 126 | (let ((kana (match-string 1)) |
| 127 | str candidates) |
| 128 | (while (looking-at "/[#0-9 ]*\\([^/\n]*\\)/") |
| 129 | (setq str (match-string 1)) |
| 130 | (if (not (member str candidates)) |
| 131 | (setq candidates (cons str candidates))) |
| 132 | (goto-char (match-end 1))) |
| 133 | (with-current-buffer buf |
| 134 | (insert "\"" kana) |
| 135 | (while candidates |
| 136 | (insert " " (car candidates)) |
| 137 | (let ((entry (lookup-nested-alist (car candidates) |
| 138 | skkdic-postfix-list nil nil t))) |
| 139 | (if (consp (car entry)) |
| 140 | (if (not (member kana (car entry))) |
| 141 | (setcar entry (cons kana (car entry)))) |
| 142 | (set-nested-alist (car candidates) (list kana) |
| 143 | skkdic-postfix-list))) |
| 144 | (setq candidates (cdr candidates))) |
| 145 | (insert "\"\n")))) |
| 146 | (with-current-buffer buf |
| 147 | (insert ")\n\n"))) |
| 148 | |
| 149 | (defconst skkdic-prefix-list '(skkdic-prefix-list)) |
| 150 | |
| 151 | (defun skkdic-convert-prefix (skkbuf buf) |
| 152 | (message "Processing PREFIX entries ...") |
| 153 | (goto-char (point-min)) |
| 154 | (with-current-buffer buf |
| 155 | (insert ";; Setting prefix entries.\n" |
| 156 | "(skkdic-set-prefix\n")) |
| 157 | (save-excursion |
| 158 | (while (re-search-forward "^\\(\\(\\cH\\|\e$B!<\e(B\\)+\\)[<>?] " nil t) |
| 159 | (let ((kana (match-string 1)) |
| 160 | str candidates) |
| 161 | (while (looking-at "/\\([^/\n]+\\)/") |
| 162 | (setq str (match-string 1)) |
| 163 | (if (not (member str candidates)) |
| 164 | (setq candidates (cons str candidates))) |
| 165 | (goto-char (match-end 1))) |
| 166 | (with-current-buffer buf |
| 167 | (insert "\"" kana) |
| 168 | (while candidates |
| 169 | (insert " " (car candidates)) |
| 170 | (set-nested-alist (car candidates) kana skkdic-prefix-list) |
| 171 | (setq candidates (cdr candidates))) |
| 172 | (insert "\"\n"))))) |
| 173 | (with-current-buffer buf |
| 174 | (insert ")\n\n"))) |
| 175 | |
| 176 | ;; FROM and TO point the head and tail of "/J../J../.../". |
| 177 | (defun skkdic-get-candidate-list (from to) |
| 178 | (let (candidates) |
| 179 | (goto-char from) |
| 180 | (while (re-search-forward "/[^/ \n]+" to t) |
| 181 | (setq candidates (cons (buffer-substring (1+ (match-beginning 0)) |
| 182 | (match-end 0)) |
| 183 | candidates))) |
| 184 | candidates)) |
| 185 | |
| 186 | ;; Return entry for STR from nested alist ALIST. |
| 187 | (defsubst skkdic-get-entry (str alist) |
| 188 | (car (lookup-nested-alist str alist nil nil t))) |
| 189 | |
| 190 | |
| 191 | (defconst skkdic-word-list '(skkdic-word-list)) |
| 192 | |
| 193 | ;; Return t if substring of STR (between FROM and TO) can be broken up |
| 194 | ;; to chunks all of which can be derived from another entry in SKK |
| 195 | ;; dictionary. SKKBUF is the buffer where the original SKK dictionary |
| 196 | ;; is visited, KANA is the current entry for STR. FIRST is t only if |
| 197 | ;; this is called at top level. |
| 198 | |
| 199 | (defun skkdic-breakup-string (skkbuf kana str from to &optional first) |
| 200 | (let ((len (- to from))) |
| 201 | (or (and (>= len 2) |
| 202 | (let ((min-idx (+ from 2)) |
| 203 | (idx (if first (1- to ) to)) |
| 204 | (found nil)) |
| 205 | (while (and (not found) (>= idx min-idx)) |
| 206 | (let ((kana2-list (skkdic-get-entry |
| 207 | (substring str from idx) |
| 208 | skkdic-word-list))) |
| 209 | (if (or (and (consp kana2-list) |
| 210 | (let ((kana-len (length kana)) |
| 211 | kana2) |
| 212 | (catch 'skkdic-tag |
| 213 | (while kana2-list |
| 214 | (setq kana2 (car kana2-list)) |
| 215 | (if (string-match kana2 kana) |
| 216 | (throw 'skkdic-tag t)) |
| 217 | (setq kana2-list (cdr kana2-list))))) |
| 218 | (or (= idx to) |
| 219 | (skkdic-breakup-string skkbuf kana str |
| 220 | idx to))) |
| 221 | (and (stringp kana2-list) |
| 222 | (string-match kana2-list kana))) |
| 223 | (setq found t) |
| 224 | (setq idx (1- idx))))) |
| 225 | found)) |
| 226 | (and first |
| 227 | (> len 2) |
| 228 | (let ((kana2 (skkdic-get-entry |
| 229 | (substring str from (1+ from)) |
| 230 | skkdic-prefix-list))) |
| 231 | (and (stringp kana2) |
| 232 | (eq (string-match kana2 kana) 0))) |
| 233 | (skkdic-breakup-string skkbuf kana str (1+ from) to)) |
| 234 | (and (not first) |
| 235 | (>= len 1) |
| 236 | (let ((kana2-list (skkdic-get-entry |
| 237 | (substring str from to) |
| 238 | skkdic-postfix-list))) |
| 239 | (and (consp kana2-list) |
| 240 | (let (kana2) |
| 241 | (catch 'skkdic-tag |
| 242 | (while kana2-list |
| 243 | (setq kana2 (car kana2-list)) |
| 244 | (if (string= kana2 |
| 245 | (substring kana (- (length kana2)))) |
| 246 | (throw 'skkdic-tag t)) |
| 247 | (setq kana2-list (cdr kana2-list))))))))))) |
| 248 | |
| 249 | ;; Return list of candidates which excludes some from CANDIDATES. |
| 250 | ;; Excluded candidates can be derived from another entry. |
| 251 | |
| 252 | (defun skkdic-reduced-candidates (skkbuf kana candidates) |
| 253 | (let (elt l) |
| 254 | (while candidates |
| 255 | (setq elt (car candidates)) |
| 256 | (if (or (= (length elt) 1) |
| 257 | (and (string-match "^\\cj" elt) |
| 258 | (not (skkdic-breakup-string skkbuf kana elt 0 (length elt) |
| 259 | 'first)))) |
| 260 | (setq l (cons elt l))) |
| 261 | (setq candidates (cdr candidates))) |
| 262 | (nreverse l))) |
| 263 | |
| 264 | (defvar skkdic-okuri-nasi-entries (list nil)) |
| 265 | (defvar skkdic-okuri-nasi-entries-count 0) |
| 266 | |
| 267 | (defun skkdic-collect-okuri-nasi () |
| 268 | (message "Collecting OKURI-NASI entries ...") |
| 269 | (save-excursion |
| 270 | (let ((prev-ratio 0) |
| 271 | ratio) |
| 272 | (while (re-search-forward "^\\(\\(\\cH\\|\e$B!<\e(B\\)+\\) \\(/\\cj.*\\)/$" |
| 273 | nil t) |
| 274 | (let ((kana (match-string 1)) |
| 275 | (candidates (skkdic-get-candidate-list (match-beginning 3) |
| 276 | (match-end 3)))) |
| 277 | (setq skkdic-okuri-nasi-entries |
| 278 | (cons (cons kana candidates) skkdic-okuri-nasi-entries) |
| 279 | skkdic-okuri-nasi-entries-count |
| 280 | (1+ skkdic-okuri-nasi-entries-count)) |
| 281 | (setq ratio (floor (/ (* (point) 100.0) (point-max)))) |
| 282 | (if (/= ratio prev-ratio) |
| 283 | (progn |
| 284 | (message "collected %2d%% %s ..." ratio kana) |
| 285 | (setq prev-ratio ratio))) |
| 286 | (while candidates |
| 287 | (let ((entry (lookup-nested-alist (car candidates) |
| 288 | skkdic-word-list nil nil t))) |
| 289 | (if (consp (car entry)) |
| 290 | (setcar entry (cons kana (car entry))) |
| 291 | (set-nested-alist (car candidates) (list kana) |
| 292 | skkdic-word-list))) |
| 293 | (setq candidates (cdr candidates)))))))) |
| 294 | |
| 295 | (defun skkdic-convert-okuri-nasi (skkbuf buf) |
| 296 | (message "Processing OKURI-NASI entries ...") |
| 297 | (with-current-buffer buf |
| 298 | (insert ";; Setting okuri-nasi entries.\n" |
| 299 | "(skkdic-set-okuri-nasi\n") |
| 300 | (let ((l (nreverse skkdic-okuri-nasi-entries)) |
| 301 | (count 0) |
| 302 | (prev-ratio 0) |
| 303 | ratio) |
| 304 | (while l |
| 305 | (let ((kana (car (car l))) |
| 306 | (candidates (cdr (car l)))) |
| 307 | (setq ratio (/ (* count 1000) skkdic-okuri-nasi-entries-count) |
| 308 | count (1+ count)) |
| 309 | (if (/= prev-ratio (/ ratio 10)) |
| 310 | (progn |
| 311 | (message "processed %2d%% %s ..." (/ ratio 10) kana) |
| 312 | (setq prev-ratio (/ ratio 10)))) |
| 313 | (if (setq candidates |
| 314 | (skkdic-reduced-candidates skkbuf kana candidates)) |
| 315 | (progn |
| 316 | (insert "\"" kana) |
| 317 | (while candidates |
| 318 | (insert " " (car candidates)) |
| 319 | (setq candidates (cdr candidates))) |
| 320 | (insert "\"\n")))) |
| 321 | (setq l (cdr l)))) |
| 322 | (insert ")\n\n"))) |
| 323 | |
| 324 | (defun skkdic-convert (filename &optional dirname) |
| 325 | "Generate Emacs Lisp file form Japanese dictionary file FILENAME. |
| 326 | The format of the dictionary file should be the same as SKK dictionaries. |
| 327 | Optional argument DIRNAME if specified is the directory name under which |
| 328 | the generated Emacs Lisp is saved. |
| 329 | The name of generated file is specified by the variable `ja-dic-filename'." |
| 330 | (interactive "FSKK dictionary file: ") |
| 331 | (message "Reading file \"%s\" ..." filename) |
| 332 | (let* ((coding-system-for-read 'euc-japan) |
| 333 | (skkbuf(find-file-noselect (expand-file-name filename))) |
| 334 | (buf (get-buffer-create "*skkdic-work*"))) |
| 335 | ;; Setup and generate the header part of working buffer. |
| 336 | (with-current-buffer buf |
| 337 | (erase-buffer) |
| 338 | (buffer-disable-undo) |
| 339 | (insert ";;; ja-dic.el --- dictionary for Japanese input method" |
| 340 | " -*-coding: euc-japan; byte-compile-disable-print-circle:t; -*-\n" |
| 341 | ";;\tGenerated by the command `skkdic-convert'\n" |
| 342 | ";;\tDate: " (current-time-string) "\n" |
| 343 | ";;\tOriginal SKK dictionary file: " |
| 344 | (file-relative-name (expand-file-name filename) dirname) |
| 345 | "\n\n" |
| 346 | ";; This file is part of GNU Emacs.\n\n" |
| 347 | ";;; Commentary:\n\n" |
| 348 | ";; Do byte-compile this file again after any modification.\n\n" |
| 349 | ";;; Start of the header of the original SKK dictionary.\n\n") |
| 350 | (set-buffer skkbuf) |
| 351 | (widen) |
| 352 | (goto-char 1) |
| 353 | (let (pos) |
| 354 | (search-forward ";; okuri-ari") |
| 355 | (forward-line 1) |
| 356 | (setq pos (point)) |
| 357 | (set-buffer buf) |
| 358 | (insert-buffer-substring skkbuf 1 pos)) |
| 359 | (insert "\n" |
| 360 | ";;; Code:\n\n(eval-when-compile (require 'ja-dic-cnv))\n\n") |
| 361 | |
| 362 | ;; Generate the body part of working buffer. |
| 363 | (set-buffer skkbuf) |
| 364 | (let ((from (point)) |
| 365 | to) |
| 366 | ;; Convert okuri-ari entries. |
| 367 | (search-forward ";; okuri-nasi") |
| 368 | (beginning-of-line) |
| 369 | (setq to (point)) |
| 370 | (narrow-to-region from to) |
| 371 | (skkdic-convert-okuri-ari skkbuf buf) |
| 372 | (widen) |
| 373 | |
| 374 | ;; Convert okuri-nasi postfix entries. |
| 375 | (goto-char to) |
| 376 | (forward-line 1) |
| 377 | (setq from (point)) |
| 378 | (re-search-forward "^\\cH") |
| 379 | (setq to (match-beginning 0)) |
| 380 | (narrow-to-region from to) |
| 381 | (skkdic-convert-postfix skkbuf buf) |
| 382 | (widen) |
| 383 | |
| 384 | ;; Convert okuri-nasi prefix entries. |
| 385 | (goto-char to) |
| 386 | (skkdic-convert-prefix skkbuf buf) |
| 387 | |
| 388 | ;; |
| 389 | (skkdic-collect-okuri-nasi) |
| 390 | |
| 391 | ;; Convert okuri-nasi general entries. |
| 392 | (skkdic-convert-okuri-nasi skkbuf buf) |
| 393 | |
| 394 | ;; Postfix |
| 395 | (with-current-buffer buf |
| 396 | (goto-char (point-max)) |
| 397 | (insert ";;\n(provide 'ja-dic)\n\n;;; ja-dic.el ends here\n"))) |
| 398 | |
| 399 | ;; Save the working buffer. |
| 400 | (set-buffer buf) |
| 401 | (set-visited-file-name (expand-file-name ja-dic-filename dirname) t) |
| 402 | (set-buffer-file-coding-system 'euc-japan) |
| 403 | (save-buffer 0)) |
| 404 | (kill-buffer skkbuf) |
| 405 | (switch-to-buffer buf))) |
| 406 | |
| 407 | (defun batch-skkdic-convert () |
| 408 | "Run `skkdic-convert' on the files remaining on the command line. |
| 409 | Use this from the command line, with `-batch'; |
| 410 | it won't work in an interactive Emacs. |
| 411 | For example, invoke: |
| 412 | % emacs -batch -l ja-dic-cnv -f batch-skkdic-convert SKK-JISYO.L |
| 413 | to generate \"ja-dic.el\" from SKK dictionary file \"SKK-JISYO.L\". |
| 414 | To get complete usage, invoke: |
| 415 | % emacs -batch -l ja-dic-cnv -f batch-skkdic-convert -h" |
| 416 | (defvar command-line-args-left) ; Avoid compiler warning. |
| 417 | (if (not noninteractive) |
| 418 | (error "`batch-skkdic-convert' should be used only with -batch")) |
| 419 | (if (string= (car command-line-args-left) "-h") |
| 420 | (progn |
| 421 | (message "To convert SKK-JISYO.L into skkdic.el:") |
| 422 | (message " %% emacs -batch -l ja-dic-cnv -f batch-skkdic-convert SKK-JISYO.L") |
| 423 | (message "To convert SKK-JISYO.L into DIR/ja-dic.el:") |
| 424 | (message " %% emacs -batch -l ja-dic-cnv -f batch-skkdic-convert -dir DIR SKK-JISYO.L")) |
| 425 | (let (targetdir filename) |
| 426 | (if (string= (car command-line-args-left) "-dir") |
| 427 | (progn |
| 428 | (setq command-line-args-left (cdr command-line-args-left)) |
| 429 | (setq targetdir (expand-file-name (car command-line-args-left))) |
| 430 | (setq command-line-args-left (cdr command-line-args-left)))) |
| 431 | (setq filename (expand-file-name (car command-line-args-left))) |
| 432 | (message "Converting %s to %s ..." filename ja-dic-filename) |
| 433 | (message "It takes around 10 minutes even on Sun SS20.") |
| 434 | (skkdic-convert filename targetdir) |
| 435 | (message "Do byte-compile the created file by:") |
| 436 | (message " %% emacs -batch -f batch-byte-compile %s" ja-dic-filename) |
| 437 | )) |
| 438 | (kill-emacs 0)) |
| 439 | |
| 440 | |
| 441 | ;; The following macros are expanded at byte-compiling time so that |
| 442 | ;; compiled code can be loaded quickly. |
| 443 | |
| 444 | (defun skkdic-get-kana-compact-codes (kana) |
| 445 | (let* ((len (length kana)) |
| 446 | (vec (make-vector len 0)) |
| 447 | (i 0) |
| 448 | ch) |
| 449 | (while (< i len) |
| 450 | (setq ch (aref kana i)) |
| 451 | (aset vec i |
| 452 | (if (< ch 128) ; CH is an ASCII letter for OKURIGANA, |
| 453 | (- ch) ; represented by a negative code. |
| 454 | (if (= ch ?\e$B!<\e(B) ; `\e$B!<\e(B' is represented by 0. |
| 455 | 0 |
| 456 | (- (logand (encode-char ch 'japanese-jisx0208) #xFF) 32)))) |
| 457 | (setq i (1+ i))) |
| 458 | vec)) |
| 459 | |
| 460 | (defun skkdic-extract-conversion-data (entry) |
| 461 | (string-match "^\\cj+[a-z]* " entry) |
| 462 | (let ((kana (substring entry (match-beginning 0) (1- (match-end 0)))) |
| 463 | (i (match-end 0)) |
| 464 | candidates) |
| 465 | (while (string-match "[^ ]+" entry i) |
| 466 | (setq candidates (cons (match-string 0 entry) candidates)) |
| 467 | (setq i (match-end 0))) |
| 468 | (cons (skkdic-get-kana-compact-codes kana) candidates))) |
| 469 | |
| 470 | (defmacro skkdic-set-okuri-ari (&rest entries) |
| 471 | `(defconst skkdic-okuri-ari |
| 472 | ',(let ((l entries) |
| 473 | (map '(skkdic-okuri-ari)) |
| 474 | entry) |
| 475 | (while l |
| 476 | (setq entry (skkdic-extract-conversion-data (car l))) |
| 477 | (set-nested-alist (car entry) (cdr entry) map) |
| 478 | (setq l (cdr l))) |
| 479 | map))) |
| 480 | |
| 481 | (defmacro skkdic-set-postfix (&rest entries) |
| 482 | `(defconst skkdic-postfix |
| 483 | ',(let ((l entries) |
| 484 | (map '(nil)) |
| 485 | (longest 1) |
| 486 | len entry) |
| 487 | (while l |
| 488 | (setq entry (skkdic-extract-conversion-data (car l))) |
| 489 | (setq len (length (car entry))) |
| 490 | (if (> len longest) |
| 491 | (setq longest len)) |
| 492 | (let ((entry2 (lookup-nested-alist (car entry) map nil nil t))) |
| 493 | (if (consp (car entry2)) |
| 494 | (let ((conversions (cdr entry))) |
| 495 | (while conversions |
| 496 | (if (not (member (car conversions) (car entry2))) |
| 497 | (setcar entry2 (cons (car conversions) (car entry2)))) |
| 498 | (setq conversions (cdr conversions)))) |
| 499 | (set-nested-alist (car entry) (cdr entry) map))) |
| 500 | (setq l (cdr l))) |
| 501 | (setcar map longest) |
| 502 | map))) |
| 503 | |
| 504 | (defmacro skkdic-set-prefix (&rest entries) |
| 505 | `(defconst skkdic-prefix |
| 506 | ',(let ((l entries) |
| 507 | (map '(nil)) |
| 508 | (longest 1) |
| 509 | len entry) |
| 510 | (while l |
| 511 | (setq entry (skkdic-extract-conversion-data (car l))) |
| 512 | (setq len (length (car entry))) |
| 513 | (if (> len longest) |
| 514 | (setq longest len)) |
| 515 | (let ((entry2 (lookup-nested-alist (car entry) map len nil t))) |
| 516 | (if (consp (car entry2)) |
| 517 | (let ((conversions (cdr entry))) |
| 518 | (while conversions |
| 519 | (if (not (member (car conversions) (car entry2))) |
| 520 | (setcar entry2 (cons (car conversions) (car entry2)))) |
| 521 | (setq conversions (cdr conversions)))) |
| 522 | (set-nested-alist (car entry) (cdr entry) map len))) |
| 523 | (setq l (cdr l))) |
| 524 | (setcar map longest) |
| 525 | map))) |
| 526 | |
| 527 | (defmacro skkdic-set-okuri-nasi (&rest entries) |
| 528 | `(defconst skkdic-okuri-nasi |
| 529 | ',(let ((l entries) |
| 530 | (map '(skdic-okuri-nasi)) |
| 531 | (count 0) |
| 532 | entry) |
| 533 | (while l |
| 534 | (setq count (1+ count)) |
| 535 | (if (= (% count 10000) 0) |
| 536 | (message "%d entries" count)) |
| 537 | (setq entry (skkdic-extract-conversion-data (car l))) |
| 538 | (set-nested-alist (car entry) (cdr entry) map) |
| 539 | (setq l (cdr l))) |
| 540 | map))) |
| 541 | |
| 542 | (provide 'ja-dic-cnv) |
| 543 | |
| 544 | ;; Local Variables: |
| 545 | ;; coding: iso-2022-7bit |
| 546 | ;; End: |
| 547 | |
| 548 | ;; arch-tag: dec06fb0-8118-45b1-80d7-dc360b6fd3b2 |
| 549 | ;;; ja-dic-cnv.el ends here |