Commit | Line | Data |
---|---|---|
60370d40 | 1 | ;;; ja-dic-utl.el --- utilities for handling Japanese dictionary (SKK-JISYO.L) |
383a3ccf | 2 | |
7976eda0 | 3 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
5df4f04c | 4 | ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
2fd125a3 KH |
5 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
6 | ;; Registration Number H14PRO021 | |
383a3ccf | 7 | |
49e64228 | 8 | ;; Keywords: i18n, mule, multilingual, Japanese |
383a3ccf KH |
9 | |
10 | ;; This file is part of GNU Emacs. | |
11 | ||
4936186e | 12 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
383a3ccf | 13 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
14 | ;; the Free Software Foundation, either version 3 of the License, or |
15 | ;; (at your option) any later version. | |
383a3ccf KH |
16 | |
17 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;; GNU General Public License for more details. | |
21 | ||
22 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 23 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
383a3ccf KH |
24 | |
25 | ;;; Commentary: | |
26 | ||
27 | ;; This file provides a generic function to look up a Japanese | |
28 | ;; dictionary of SKK format. | |
29 | ;; | |
30 | ;; SKK is a free Japanese input method running on Mule created by | |
31 | ;; Masahiko Sato <masahiko@sato.riec.tohoku.ac.jp>. The Emacs Lisp | |
32 | ;; library kkc.el provides a facility to convert a Japanese kana | |
33 | ;; string to a kanji-kana-mixed string by using SKK's dictionary. | |
34 | ;; | |
35 | ;; The original SKK dictionary SKK-JISYO.L is converted to ja-dic.el | |
36 | ;; by ja-dic-cnv.el. We get entries of the dictionary in four | |
37 | ;; variables (listed below) by loading that file (or byte-compiled | |
38 | ;; version ja-dic.elc). | |
39 | ||
40 | ;;; Code: | |
41 | ||
42 | ;; The following four variables are set by loading ja-dic.el[c]. | |
43 | (defvar skkdic-okuri-ari nil | |
44 | "Nested alist for OKURI-ARI entries of SKK dictionary.") | |
45 | ||
46 | (defvar skkdic-postfix nil | |
47 | "Nested alist for SETSUBIJI (postfix) entries of SKK dictionary.") | |
48 | ||
49 | (defvar skkdic-prefix nil | |
50 | "Nested alist SETTOUJI (prefix) entries of SKK dictionary.") | |
51 | ||
52 | (defvar skkdic-okuri-nasi nil | |
53 | "Nested alist for OKURI-NASI entries of SKK dictionary.") | |
54 | ||
55 | (defconst skkdic-okurigana-table | |
56 | '((?\e$B$!\e(B . ?a) (?\e$B$"\e(B . ?a) (?\e$B$#\e(B . ?i) (?\e$B$$\e(B . ?i) (?\e$B$%\e(B . ?u) | |
57 | (?\e$B$&\e(B . ?u) (?\e$B$'\e(B . ?e) (?\e$B$(\e(B . ?e) (?\e$B$)\e(B . ?o) (?\e$B$*\e(B . ?o) | |
58 | (?\e$B$+\e(B . ?k) (?\e$B$,\e(B . ?g) (?\e$B$-\e(B . ?k) (?\e$B$.\e(B . ?g) (?\e$B$/\e(B . ?k) | |
59 | (?\e$B$0\e(B . ?g) (?\e$B$1\e(B . ?k) (?\e$B$2\e(B . ?g) (?\e$B$3\e(B . ?k) (?\e$B$4\e(B . ?g) | |
60 | (?\e$B$5\e(B . ?s) (?\e$B$6\e(B . ?z) (?\e$B$7\e(B . ?s) (?\e$B$8\e(B . ?j) (?\e$B$9\e(B . ?s) | |
61 | (?\e$B$:\e(B . ?z) (?\e$B$;\e(B . ?s) (?\e$B$<\e(B . ?z) (?\e$B$=\e(B . ?s) (?\e$B$>\e(B . ?z) | |
62 | (?\e$B$?\e(B . ?t) (?\e$B$@\e(B . ?d) (?\e$B$A\e(B . ?t) (?\e$B$B\e(B . ?d) (?\e$B$C\e(B . ?t) | |
63 | (?\e$B$D\e(B . ?t) (?\e$B$E\e(B . ?d) (?\e$B$F\e(B . ?t) (?\e$B$G\e(B . ?d) (?\e$B$H\e(B . ?t) (?\e$B$I\e(B . ?d) | |
64 | (?\e$B$J\e(B . ?n) (?\e$B$K\e(B . ?n) (?\e$B$L\e(B . ?n) (?\e$B$M\e(B . ?n) (?\e$B$N\e(B . ?n) | |
65 | (?\e$B$O\e(B . ?h) (?\e$B$P\e(B . ?b) (?\e$B$Q\e(B . ?p) (?\e$B$R\e(B . ?h) (?\e$B$S\e(B . ?b) | |
66 | (?\e$B$T\e(B . ?p) (?\e$B$U\e(B . ?h) (?\e$B$V\e(B . ?b) (?\e$B$W\e(B . ?p) (?\e$B$X\e(B . ?h) | |
67 | (?\e$B$Y\e(B . ?b) (?\e$B$Z\e(B . ?p) (?\e$B$[\e(B . ?h) (?\e$B$\\e(B . ?b) (?\e$B$]\e(B . ?p) | |
68 | (?\e$B$^\e(B . ?m) (?\e$B$_\e(B . ?m) (?\e$B$`\e(B . ?m) (?\e$B$a\e(B . ?m) (?\e$B$b\e(B . ?m) | |
69 | (?\e$B$c\e(B . ?y) (?\e$B$d\e(B . ?y) (?\e$B$e\e(B . ?y) (?\e$B$f\e(B . ?y) (?\e$B$g\e(B . ?y) (?\e$B$h\e(B . ?y) | |
70 | (?\e$B$i\e(B . ?r) (?\e$B$j\e(B . ?r) (?\e$B$k\e(B . ?r) (?\e$B$l\e(B . ?r) (?\e$B$m\e(B . ?r) | |
71 | (?\e$B$o\e(B . ?w) (?\e$B$p\e(B . ?w) (?\e$B$q\e(B . ?w) (?\e$B$r\e(B . ?w) | |
72 | (?\e$B$s\e(B . ?n) | |
73 | ) | |
74 | "Alist of Okuriganas vs trailing ASCII letters in OKURI-ARI entry.") | |
75 | ||
76 | (defun skkdic-merge-head-and-tail (heads tails postfix) | |
77 | (let ((min-len 2) | |
78 | l) | |
79 | (while heads | |
80 | (if (or (not postfix) | |
81 | (>= (length (car heads)) min-len)) | |
82 | (let ((tail tails)) | |
83 | (while tail | |
84 | (if (or postfix | |
85 | (>= (length (car tail)) min-len)) | |
86 | (setq l (cons (concat (car heads) (car tail)) l))) | |
87 | (setq tail (cdr tail))))) | |
88 | (setq heads (cdr heads))) | |
89 | l)) | |
90 | ||
073d10a1 KH |
91 | (defconst skkdic-jisx0208-hiragana-block |
92 | (cons (decode-char 'japanese-jisx0208 #x2421) | |
93 | (decode-char 'japanese-jisx0208 #x247E))) | |
383a3ccf KH |
94 | |
95 | (defun skkdic-lookup-key (seq len &optional postfix prefer-noun) | |
96 | "Return a list of conversion string for sequence SEQ of length LEN. | |
97 | ||
98 | SEQ is a vector of Kana characters to be converted by SKK dictionary. | |
99 | If LEN is shorter than the length of KEYSEQ, the first LEN keys in SEQ | |
100 | are took into account. | |
101 | ||
102 | Optional 3rd arg POSTFIX non-nil means SETSUBIJI (postfix) are also | |
103 | considered to find conversion strings. | |
104 | ||
105 | Optional 4th arg PREFER-NOUN non-nil means that the conversions | |
106 | without okurigana are placed at the head of the returned list." | |
107 | (or skkdic-okuri-nasi | |
108 | (condition-case err | |
109 | (load-library "ja-dic/ja-dic") | |
110 | (error (ding) | |
111 | (with-output-to-temp-buffer "*Help*" | |
112 | (princ "The library `ja-dic' can't be loaded. | |
113 | ||
114 | The most common case is that you have not yet installed the library | |
115 | included in LEIM (Libraries of Emacs Input Method) which is | |
116 | distributed separately from Emacs. | |
117 | ||
118 | LEIM is available from the same ftp directory as Emacs.")) | |
119 | (signal (car err) (cdr err))))) | |
120 | ||
121 | (let ((vec (make-vector len 0)) | |
122 | (i 0) | |
123 | entry) | |
124 | ;; At first, generate vector VEC from SEQ for looking up SKK | |
125 | ;; alists. Nth element in VEC corresponds to Nth element in SEQ. | |
126 | ;; The values are decided as follows. | |
127 | ;; If SEQ[N] is `\e$B!<\e(B', VEC[N] is 0, | |
128 | ;; else if SEQ[N] is a Hiragana character, VEC[N] is: | |
129 | ;; ((The 2nd position code of SEQ[N]) - 32), | |
130 | ;; else VEC[N] is 128. | |
131 | (while (< i len) | |
132 | (let ((ch (aref seq i)) | |
15e44df7 KH |
133 | code) |
134 | (cond ((= ch ?\e$B!<\e(B) | |
135 | (aset vec i 0)) | |
136 | ((and (>= ch (car skkdic-jisx0208-hiragana-block)) | |
137 | (<= ch (cdr skkdic-jisx0208-hiragana-block))) | |
138 | (setq code (encode-char ch 'japanese-jisx0208)) | |
139 | (if code | |
140 | (aset vec i (- (logand code #xFF) 32)) | |
141 | (aset vec i 128))) | |
142 | (t | |
143 | (aset vec i 128)))) | |
383a3ccf KH |
144 | (setq i (1+ i))) |
145 | ||
146 | ;; Search OKURI-NASI entries. | |
147 | (setq entry (lookup-nested-alist vec skkdic-okuri-nasi len 0 t)) | |
148 | (if (consp (car entry)) | |
149 | (setq entry (copy-sequence (car entry))) | |
150 | (setq entry nil)) | |
151 | ||
152 | (if postfix | |
153 | ;; Search OKURI-NASI entries with postfixes. | |
154 | (let ((break (max (- len (car skkdic-postfix)) 1)) | |
155 | entry-head entry-postfix entry2) | |
156 | (while (< break len) | |
157 | (if (and (setq entry-head | |
158 | (lookup-nested-alist vec skkdic-okuri-nasi | |
159 | break 0 t)) | |
160 | (consp (car entry-head)) | |
161 | (setq entry-postfix | |
162 | (lookup-nested-alist vec skkdic-postfix | |
163 | len break t)) | |
164 | (consp (car entry-postfix)) | |
165 | (setq entry2 (skkdic-merge-head-and-tail | |
166 | (car entry-head) (car entry-postfix) t))) | |
167 | (if entry | |
168 | (nconc entry entry2) | |
169 | (setq entry entry2))) | |
170 | (setq break (1+ break))))) | |
171 | ||
172 | ;; Search OKURI-NASI entries with prefixes. | |
173 | (let ((break (min (car skkdic-prefix) (- len 2))) | |
174 | entry-prefix entry-tail entry2) | |
175 | (while (> break 0) | |
176 | (if (and (setq entry-prefix | |
177 | (lookup-nested-alist vec skkdic-prefix break 0 t)) | |
178 | (consp (car entry-prefix)) | |
179 | (setq entry-tail | |
180 | (lookup-nested-alist vec skkdic-okuri-nasi len break t)) | |
181 | (consp (car entry-tail)) | |
182 | (setq entry2 (skkdic-merge-head-and-tail | |
183 | (car entry-prefix) (car entry-tail) nil))) | |
184 | (progn | |
185 | (if entry | |
186 | (nconc entry entry2) | |
187 | (setq entry entry2)))) | |
188 | (setq break (1- break)))) | |
189 | ||
190 | ;; Search OKURI-ARI entries. | |
191 | (let ((okurigana (assq (aref seq (1- len)) skkdic-okurigana-table)) | |
192 | orig-element entry2) | |
193 | (if okurigana | |
194 | (progn | |
195 | (setq orig-element (aref vec (1- len))) | |
196 | (aset vec (1- len) (- (cdr okurigana))) | |
197 | (if (and (setq entry2 (lookup-nested-alist vec skkdic-okuri-ari | |
198 | len 0 t)) | |
199 | (consp (car entry2))) | |
200 | (progn | |
201 | (setq entry2 (copy-sequence (car entry2))) | |
202 | (let ((l entry2) | |
203 | (okuri (char-to-string (aref seq (1- len))))) | |
204 | (while l | |
205 | (setcar l (concat (car l) okuri)) | |
206 | (setq l (cdr l))) | |
207 | (if entry | |
208 | (if prefer-noun | |
209 | (nconc entry entry2) | |
210 | (setq entry2 (nreverse entry2)) | |
211 | (nconc entry2 entry) | |
212 | (setq entry entry2)) | |
213 | (setq entry (nreverse entry2)))))) | |
214 | (aset vec (1- len) orig-element)))) | |
215 | ||
216 | entry)) | |
217 | ||
218 | ;; | |
219 | (provide 'ja-dic-utl) | |
220 | ||
8dad09ab KH |
221 | ;; Local Variables: |
222 | ;; coding: iso-2022-7bit | |
223 | ;; End: | |
60370d40 PJ |
224 | |
225 | ;;; ja-dic-utl.el ends here |