Commit | Line | Data |
---|---|---|
75b62c63 KH |
1 | ;;; knd-util.el --- Support for composing Kannada characters |
2 | ||
38141d20 GM |
3 | ;; Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 |
4 | ;; Free Software Foundation, Inc. | |
75b62c63 KH |
5 | |
6 | ;; Maintainer: Maintainer: CHOWKSEY, Kailash C. <klchxbec@m-net.arbornet.org> | |
7 | ;; Keywords: multilingual, Kannada | |
8 | ||
9 | ;; This file is part of GNU Emacs. | |
10 | ||
11 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
12 | ;; it under the terms of the GNU General Public License as published by | |
b4aa6026 | 13 | ;; the Free Software Foundation; either version 3, or (at your option) |
75b62c63 KH |
14 | ;; any later version. |
15 | ||
16 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
17 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | ;; GNU General Public License for more details. | |
20 | ||
21 | ;; You should have received a copy of the GNU General Public License | |
22 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
3a35cf56 LK |
23 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
24 | ;; Boston, MA 02110-1301, USA. | |
75b62c63 KH |
25 | |
26 | ;; Created: Jul. 14. 2003 | |
27 | ||
28 | ;;; Commentary: | |
29 | ||
30 | ;; This file provides character(Unicode) to glyph(CDAC) conversion and | |
31 | ;; composition of Kannada script characters. | |
32 | ||
33 | ;;; Code: | |
34 | ||
35 | ;;;###autoload | |
36 | ||
37 | ;; Kannada Composable Pattern | |
38 | ;; C .. Consonants | |
39 | ;; V .. Vowel | |
40 | ;; H .. Virama | |
41 | ;; M .. Matra | |
42 | ;; V .. Vowel | |
43 | ;; (N .. Zerowidth Non Joiner) | |
44 | ;; (J .. Zerowidth Joiner. ) | |
45 | ;; 1. vowel | |
46 | ;; V(A)? | |
47 | ;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya) | |
48 | ;; ((CH)?(CH)?(CH)?CH)?C(H|M?)? | |
49 | ||
50 | (defconst kannada-consonant | |
51 | "[\e$,1>u\e(B-\e$,1?9\e(B]") | |
52 | ||
53 | (defconst kannada-consonant-needs-twirl | |
54 | "[\e$,1>u>w\e(B-\e$,1>{>}\e(B-\e$,1>~? \e(B-\e$,1?"?$\e(B-\e$,1?+?-?0?3\e(B-\e$,1?9\e(B]\\(\e$,1?M\e(B[\e$,1>u\e(B-\e$,1?9\e(B]\\)*[\e$,1?A?B?C?D>b\e(B]?$") | |
55 | ||
56 | (defconst kannada-composable-pattern | |
57 | (concat | |
58 | "\\([\e$,1>b\e(B-\e$,1>t?`>l\e(B]\\)\\|[\e$,1>c\e(B]" | |
59 | "\\|\\(" | |
60 | "\\(?:\\(?:[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?\\(?:[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?\\(?:[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?" | |
61 | "[\e$,1>u\e(B-\e$,1?9\e(B]\\(?:\e$,1?M\e(B\\|[\e$,1?>\e(B-\e$,1?M?U?C\e(B]?\\)?" | |
62 | "\\)") | |
63 | "Regexp matching a composable sequence of Kannada characters.") | |
64 | ||
d63b2da8 | 65 | ;;;###autoload |
75b62c63 KH |
66 | (defun kannada-compose-region (from to) |
67 | (interactive "r") | |
68 | (save-excursion | |
69 | (save-restriction | |
70 | (narrow-to-region from to) | |
71 | (goto-char (point-min)) | |
72 | (while (re-search-forward kannada-composable-pattern nil t) | |
73 | (kannada-compose-syllable-region (match-beginning 0) | |
74 | (match-end 0)))))) | |
d63b2da8 | 75 | ;;;###autoload |
75b62c63 KH |
76 | (defun kannada-compose-string (string) |
77 | (with-temp-buffer | |
78 | (insert (decompose-string string)) | |
79 | (kannada-compose-region (point-min) (point-max)) | |
80 | (buffer-string))) | |
81 | ||
d63b2da8 | 82 | ;;;###autoload |
75b62c63 KH |
83 | (defun kannada-post-read-conversion (len) |
84 | (save-excursion | |
85 | (save-restriction | |
86 | (let ((buffer-modified-p (buffer-modified-p))) | |
87 | (narrow-to-region (point) (+ (point) len)) | |
88 | (kannada-compose-region (point-min) (point-max)) | |
89 | (set-buffer-modified-p buffer-modified-p) | |
90 | (- (point-max) (point-min)))))) | |
91 | ||
92 | (defun kannada-range (from to) | |
93 | "Make the list of the integers of range FROM to TO." | |
94 | (let (result) | |
95 | (while (<= from to) (setq result (cons to result) to (1- to))) result)) | |
96 | ||
97 | (defun kannada-regexp-of-hashtbl-keys (hashtbl) | |
98 | "Return a regular expression that matches all keys in hashtable HASHTBL." | |
99 | (let ((max-specpdl-size 1000)) | |
100 | (regexp-opt | |
101 | (sort | |
102 | (let (dummy) | |
103 | (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl) | |
104 | dummy) | |
105 | (function (lambda (x y) (> (length x) (length y)))))))) | |
106 | ||
107 | (defun kannada-regexp-of-hashtbl-vals (hashtbl) | |
108 | "Return a regular expression that matches all values in hashtable HASHTBL." | |
109 | (let ((max-specpdl-size 1000)) | |
110 | (regexp-opt | |
111 | (sort | |
112 | (let (dummy) | |
113 | (maphash (function (lambda (key val) (setq dummy (cons val dummy)))) hashtbl) | |
114 | dummy) | |
115 | (function (lambda (x y) (> (length x) (length y)))))))) | |
116 | ||
117 | (defun kannada-composition-function (from to pattern &optional string) | |
118 | "Compose Kannada characters in REGION, or STRING if specified. | |
119 | Assume that the REGION or STRING must fully match the composable | |
120 | PATTERN regexp." | |
121 | (if string (kannada-compose-syllable-string string) | |
122 | (kannada-compose-syllable-region from to)) | |
123 | (- to from)) | |
124 | ||
125 | ;; Register a function to compose Kannada characters. | |
126 | (mapc | |
127 | (function (lambda (ucs) | |
128 | (aset composition-function-table (decode-char 'ucs ucs) | |
129 | (list (cons kannada-composable-pattern | |
130 | 'kannada-composition-function))))) | |
131 | (kannada-range #x0c80 #x0cff)) | |
132 | ||
133 | ;; Notes on conversion steps. | |
134 | ||
135 | ;; 1. chars to glyphs | |
136 | ;; | |
137 | ;; Rules will not be applied to the virama appeared at the end of the | |
138 | ;; text. Also, the preceding/following "r" will be treated as special case. | |
139 | ||
140 | ;; 2. glyphs reordering. | |
141 | ;; | |
142 | ;; The glyphs are split by virama, and each glyph groups are | |
143 | ;; re-ordered in the following order. | |
144 | ;; | |
145 | ;; Note that `consonant-glyph' mentioned here does not contain the | |
146 | ;; vertical bar (right modifier) attached at the right of the | |
147 | ;; consonant. | |
148 | ;; | |
149 | ;; If the glyph-group contains right modifier, | |
150 | ;; (1) consonant-glyphs/vowels | |
151 | ;; (2) spacing | |
152 | ;; (3) right modifier (may be matra) | |
153 | ;; (4) top matra | |
154 | ;; (5) preceding "r" | |
155 | ;; (7) following "r" | |
156 | ;; (8) bottom matra or virama. | |
157 | ;; | |
158 | ;; Otherwise, | |
159 | ;; (1) consonant-glyph/vowels, with nukta sign | |
160 | ;; (3) left matra | |
161 | ;; (4) top matra | |
162 | ;; (5) preceding "r" | |
163 | ;; (7) following "r" | |
164 | ;; (8) bottom matra or virama. | |
165 | ;; (2) spacing | |
166 | ||
167 | ;; 3. glyph to glyph | |
168 | ;; | |
169 | ;; For better display, some glyph display would be tuned. | |
170 | ||
171 | ;; 4. Composition. | |
172 | ;; | |
173 | ;; left modifiers will be attached at the left. | |
174 | ;; others will be attached right. | |
175 | ||
176 | ;; Problem:: | |
177 | ;; Can we generalize this methods to other Indian scripts? | |
178 | ||
179 | (defvar knd-char-glyph | |
180 | '(("\e$,1>e\e(B" . "\e$,43@\e(B") | |
181 | ("\e$,1>f\e(B" . "\e$,43A\e(B") | |
182 | ("\e$,1?>\e(B" . "\e$,44{\e(B") | |
183 | ("\e$,1>g\e(B" . "\e$,43B\e(B") | |
184 | ("\e$,1??\e(B" . nil) | |
185 | ("\e$,1>h\e(B" . "\e$,43C\e(B") | |
186 | ("\e$,1?@\e(B" . nil) | |
187 | ("\e$,1>i\e(B" . "\e$,43D\e(B") | |
188 | ("\e$,1?A\e(B" . "\\e$,44\7f\e(B") | |
189 | ("\e$,1>j\e(B" . "\e$,43E\e(B") | |
190 | ("\e$,1?B\e(B" . "\\e$,45 \e(B") | |
191 | ("\e$,1>k\e(B" . "\e$,43F4\7f\e(B") | |
192 | ("\e$,1?C\e(B" . "\\e$,45$\e(B") | |
193 | ("\e$,1?`\e(B" . "\e$,43F5 \e(B") | |
194 | ("\e$,1?D\e(B" . "\\e$,45%\e(B") | |
195 | ;;("\e$,1>l\e(B" . nil) ; not implemented. | |
196 | ;;("\e$,1?a\e(B" . nil) | |
197 | ("\e$,1>n\e(B" . "\e$,43G\e(B") | |
198 | ("\e$,1>o\e(B" . "\e$,43H\e(B") | |
199 | ("\e$,1>p\e(B" . "\e$,43I\e(B") | |
200 | ("\e$,1?F\e(B" . "\\e$,45&\e(B") | |
201 | ("\e$,1?G\e(B" . "\\e$,45&4~\e(B") | |
202 | ("\e$,1?H\e(B" . "\\e$,45&5'\e(B") | |
203 | ("\e$,1>r\e(B" . "\e$,43J\e(B") | |
204 | ("\e$,1?J\e(B" . "\e$,45&5 \e(B") | |
205 | ("\e$,1>s\e(B" . "\e$,43K\e(B") | |
206 | ("\e$,1?K\e(B" . "\\e$,45&5 4~\e(B") | |
207 | ("\e$,1>t\e(B" . "\e$,43L\e(B") | |
208 | ("\e$,1?L\e(B" . "\\e$,45(\e(B") | |
209 | ("\e$,1>b\e(B" . "\e$,43M\e(B") | |
210 | ("\e$,1>c\e(B" . "\e$,43N\e(B") | |
211 | ("\e$,1>u?M\e(B" . "\e$,43O5)\e(B") ("\e$,1>u\e(B" . "\e$,43O\e(B") ("\e$,1>u??\e(B" . "\e$,43P\e(B") ("\e$,1>u?@\e(B" . "\e$,43P4~\e(B") | |
212 | ("\e$,1>v?M\e(B" . "\e$,43S5)\e(B") ("\e$,1>v\e(B" . "\e$,43S\e(B") ("\e$,1>v??\e(B" . "\e$,43T\e(B") ("\e$,1>v?@\e(B" . "\e$,43T4~\e(B") ("\e$,1>v?F\e(B" . "\e$,43S5&\e(B") ("\e$,1>v?G\e(B" . "\e$,43S5&4~\e(B") ("\e$,1>v?H\e(B" . "\e$,43S5&5'\e(B") ("\e$,1>v?J\e(B" . "\e$,43S5&5&5 \e(B") ("\e$,1>v?K\e(B" . "\e$,43S5&5&5 4~\e(B") ("\e$,1>v?L\e(B" . "\e$,43S5(\e(B") | |
213 | ("\e$,1>w?M\e(B" . "\e$,43V5)\e(B") ("\e$,1>w\e(B" . "\e$,43V\e(B") ("\e$,1>w??\e(B" . "\e$,43W\e(B") ("\e$,1>w?@\e(B" . "\e$,43W4~\e(B") | |
214 | ("\e$,1>x?M\e(B" . "\e$,43Y5)\e(B") ("\e$,1>x\e(B" . "\e$,43Y\e(B") ("\e$,1>x??\e(B" . "\e$,43Z\e(B") ("\e$,1>x?@\e(B" . "\e$,43Z4~\e(B") | |
215 | ("\e$,1>y?M\e(B" . "\e$,43\5)\e(B") ("\e$,1>y\e(B" . "\e$,43\\e(B") | |
216 | ("\e$,1>z?M\e(B" . "\e$,43^5)\e(B") ("\e$,1>z\e(B" . "\e$,43^\e(B") ("\e$,1>z??\e(B" . "\e$,43_\e(B") ("\e$,1>z?@\e(B" . "\e$,43_4~\e(B") | |
217 | ("\e$,1>{?M\e(B" . "\e$,43a5)\e(B") ("\e$,1>{\e(B" . "\e$,43a\e(B") ("\e$,1>{??\e(B" . "\e$,43b\e(B") ("\e$,1>{?@\e(B" . "\e$,43b4~\e(B") | |
218 | ("\e$,1>|?M\e(B" . "\e$,43d5)\e(B") ("\e$,1>|\e(B" . "\e$,43d\e(B") ("\e$,1>|??\e(B" . "\e$,43f\e(B") ("\e$,1>|?@\e(B" . "\e$,43f4~\e(B") ("\e$,1>|?F\e(B" . "\e$,43e5&\e(B") ("\e$,1>|?G\e(B" . "\e$,43e5&4~\e(B") ("\e$,1>|?H\e(B" . "\e$,43e5&5'\e(B") ("\e$,1>|?J\e(B" . "\e$,43e5&5&5 \e(B") ("\e$,1>|?K\e(B" . "\e$,43e5&5&5 4~\e(B") ("\e$,1>|?L\e(B" . "\e$,43e5(\e(B") | |
219 | ("\e$,1>}?M\e(B" . "\e$,44a4z3h4\7f5)\e(B") ("\e$,1>}\e(B" . "\e$,44a4z3h4\7f\e(B") ("\e$,1>}??\e(B" . "\e$,44b3h4\7f\e(B") ("\e$,1>}?@\e(B" . "\e$,44b3h4\7f4~\e(B") ("\e$,1>}?B\e(B". "\e$,44a4z3h5 \e(B") ("\e$,1>}?J\e(B". "\e$,44a5&3h5 \e(B") ("\e$,1>}?K\e(B". "\e$,44a5&3h5 4~\e(B") | |
220 | ("\e$,1>~?M\e(B" . "\e$,43j5)\e(B") ("\e$,1>~\e(B" . "\e$,43j\e(B") | |
221 | ("\e$,1>\7f?M\e(B" . "\e$,43m5)\e(B") ("\e$,1>\7f\e(B" . "\e$,43l\e(B") ("\e$,1?#?>\e(B" . "\e$,43m4{\e(B") ("\e$,1>\7f??\e(B" . "\e$,43n\e(B") ("\e$,1>\7f?@\e(B" . "\e$,43n4~\e(B") ("\e$,1>\7f?F\e(B" . "\e$,43m5&\e(B") ("\e$,1>\7f?G\e(B" . "\e$,43m5&4~\e(B") ("\e$,1>\7f?H\e(B" . "\e$,43m5&5'\e(B") ("\e$,1>\7f?J\e(B" . "\e$,43m5&5&5 \e(B") ("\e$,1>\7f?K\e(B" . "\e$,43m5&5&5 4~\e(B") ("\e$,1>\7f?L\e(B" . "\e$,43m5(\e(B") | |
222 | ("\e$,1? ?M\e(B" . "\e$,43p5)\e(B") ("\e$,1? \e(B" . "\e$,43p\e(B") ("\e$,1? ??\e(B" . "\e$,43q\e(B") ("\e$,1? ?@\e(B" . "\e$,43q4~\e(B") | |
223 | ("\e$,1?!?M\e(B" . "\e$,43s5)\e(B") ("\e$,1?!\e(B" . "\e$,43s\e(B") ("\e$,1?!??\e(B" . "\e$,43t\e(B") ("\e$,1?!?@\e(B" . "\e$,43t4~\e(B") | |
224 | ("\e$,1?"?M\e(B" . "\e$,43v5)\e(B") ("\e$,1?"\e(B" . "\e$,43v\e(B") ("\e$,1?"??\e(B" . "\e$,43w\e(B") ("\e$,1?"?@\e(B" . "\e$,43w4~\e(B") | |
225 | ("\e$,1?#?M\e(B" . "\e$,43z5)\e(B") ("\e$,1?#\e(B" . "\e$,43y\e(B") ("\e$,1?#?>\e(B" . "\e$,43z4{\e(B") ("\e$,1?#??\e(B" . "\e$,43{\e(B") ("\e$,1?#?@\e(B" . "\e$,43{4~\e(B") ("\e$,1?#?F\e(B" . "\e$,43z5&\e(B") ("\e$,1?#?G\e(B" . "\e$,43z5&4~\e(B") ("\e$,1?#?H\e(B" . "\e$,43z5&5'\e(B") ("\e$,1?#?J\e(B" . "\e$,43z5&5&5 \e(B") ("\e$,1?#?K\e(B" . "\e$,43z5&5&5 4~\e(B") ("\e$,1?#?L\e(B" . "\e$,43z5(\e(B") | |
226 | ("\e$,1?$?M\e(B" . "\e$,43}5)\e(B") ("\e$,1?$\e(B" . "\e$,43}\e(B") ("\e$,1?$??\e(B" . "\e$,43~\e(B") ("\e$,1?$?@\e(B" . "\e$,43~4~\e(B") | |
227 | ("\e$,1?%?M\e(B" . "\e$,44B5)\e(B") ("\e$,1?%\e(B" . "\e$,44B\e(B") ("\e$,1?%??\e(B" . "\e$,44C\e(B") ("\e$,1?%?@\e(B" . "\e$,44C4~\e(B") | |
228 | ("\e$,1?&?M\e(B" . "\e$,44E5)\e(B") ("\e$,1?&\e(B" . "\e$,44E\e(B") ("\e$,1?&??\e(B" . "\e$,44F\e(B") ("\e$,1?&?@\e(B" . "\e$,44F4~\e(B") | |
229 | ("\e$,1?'?M\e(B" . "\e$,44H5)\e(B") ("\e$,1?'\e(B" . "\e$,44H\e(B") ("\e$,1?'??\e(B" . "\e$,44I\e(B") ("\e$,1?'?@\e(B" . "\e$,44I4~\e(B") | |
230 | ("\e$,1?(?M\e(B" . "\e$,44K5)\e(B") ("\e$,1?(\e(B" . "\e$,44K\e(B") ("\e$,1?(??\e(B" . "\e$,44L\e(B") ("\e$,1?(?@\e(B" . "\e$,44L4~\e(B") | |
231 | ("\e$,1?*?M\e(B" . "\e$,44N5)\e(B") ("\e$,1?*\e(B" . "\e$,44N\e(B") ("\e$,1?*??\e(B" . "\e$,44O\e(B") ("\e$,1?*?@\e(B" . "\e$,44O4~\e(B") ("\e$,1?*?A\e(B" . "\e$,44N5"\e(B") ("\e$,1?*?B\e(B" . "\e$,44N5#\e(B") ("\e$,1?*?J\e(B" . "\e$,44N5&5#\e(B") ("\e$,1?*?K\e(B" . "\e$,44N5&5#4~\e(B") | |
232 | ("\e$,1?+?M\e(B" . "\e$,44Q5)\e(B") ("\e$,1?+\e(B" . "\e$,44Q\e(B") ("\e$,1?+??\e(B" . "\e$,44R\e(B") ("\e$,1?+?@\e(B" . "\e$,44R4~\e(B") ("\e$,1?+?A\e(B" . "\e$,44Q5"\e(B") ("\e$,1?+?B\e(B" . "\e$,44Q5#\e(B") ("\e$,1?+?J\e(B" . "\e$,44Q5&5#\e(B") ("\e$,1?+?K\e(B" . "\e$,44Q5&5#4~\e(B") | |
233 | ("\e$,1?,?M\e(B" . "\e$,44W5)\e(B") ("\e$,1?,\e(B" . "\e$,44V\e(B") ("\e$,1?,?>\e(B". "\e$,44W4{\e(B") ("\e$,1?,??\e(B" . "\e$,44X\e(B") ("\e$,1?,?@\e(B" . "\e$,44X4~\e(B") ("\e$,1?,?F\e(B" . "\e$,44W5&\e(B") ("\e$,1?,?G\e(B" . "\e$,44W5&4~\e(B") ("\e$,1?,?H\e(B" . "\e$,44W5&5'\e(B") ("\e$,1?,?J\e(B" . "\e$,44W5&5&5 \e(B") ("\e$,1?,?K\e(B" . "\e$,44W5&5&5 4~\e(B") ("\e$,1?,?L\e(B" . "\e$,44W5(\e(B") | |
234 | ("\e$,1?-?M\e(B" . "\e$,44Z5)\e(B") ("\e$,1?-\e(B" . "\e$,44Z\e(B") ("\e$,1?-??\e(B" . "\e$,44[\e(B") ("\e$,1?-?@\e(B" . "\e$,44[4~\e(B") | |
235 | ("\e$,1?.?M\e(B" . "\e$,44h5!5)\e(B") ("\e$,1?.\e(B" . "\e$,44h4z4\7f\e(B") ("\e$,1?.?>\e(B" . "\e$,44h4z5!4{\e(B") ("\e$,1?.??\e(B" . "\e$,44i4\7f\e(B") ("\e$,1?.?@\e(B" . "\e$,44i4\7f4~\e(B") ("\e$,1?.?J\e(B". "\e$,44h5&5 \e(B") ("\e$,1?.?K\e(B". "\e$,44h5&5 4~\e(B") | |
236 | ("\e$,1?/?M\e(B" . "\e$,44^4z5!5)\e(B") ("\e$,1?/\e(B" . "\e$,44^4z4\7f\e(B") ("\e$,1?/?>\e(B" . "\e$,44^4z5!4{\e(B")("\e$,1?/??\e(B" . "\e$,44_4\7f\e(B") ("\e$,1?/?@\e(B" . "\e$,44_4\7f4~\e(B") ("\e$,1?/?J\e(B" . "\e$,44^5&5 \e(B") ("\e$,1?/?K\e(B" . "\e$,44^5&5 4~\e(B") | |
237 | ("\e$,1?0?M\e(B" . "\e$,44a5)\e(B") ("\e$,1?0\e(B" . "\e$,44a\e(B") ("\e$,1?0??\e(B" . "\e$,44b\e(B") ("\e$,1?0?@\e(B" . "\e$,44b4~\e(B") | |
238 | ("\e$,1?0?M\e(B" . "\e$,44a5)\e(B") ("\e$,1?0\e(B" . "\e$,44a\e(B") ("\e$,1?0??\e(B" . "\e$,44b\e(B") ("\e$,1?0?@\e(B" . "\e$,44b4~\e(B") | |
239 | ("\e$,1?2?M\e(B" . "\e$,44e5)\e(B") ("\e$,1?2\e(B" . "\e$,44d\e(B") ("\e$,1?2?>\e(B" . "\e$,44e4{\e(B") ("\e$,1?2??\e(B" . "\e$,44f\e(B") ("\e$,1?2?@\e(B" . "\e$,44f4~\e(B") ("\e$,1?2?F\e(B" . "\e$,44e5&\e(B") ("\e$,1?2?G\e(B" . "\e$,44e5&4~\e(B") ("\e$,1?2?H\e(B" . "\e$,44e5&5'\e(B") ("\e$,1?2?J\e(B" . "\e$,44e5&5&5 \e(B") ("\e$,1?2?K\e(B" . "\e$,44e5&5&5 4~\e(B") ("\e$,1?2?L\e(B" . "\e$,44e5(\e(B") | |
240 | ("\e$,1?5?M\e(B" . "\e$,44h5)\e(B") ("\e$,1?5\e(B" . "\e$,44h\e(B") ("\e$,1?5??\e(B" . "\e$,44i\e(B") ("\e$,1?5?@\e(B" . "\e$,44i4~\e(B") ("\e$,1?5?A\e(B" . "\e$,44h5"\e(B") ("\e$,1?5?B\e(B" . "\e$,44h5#\e(B") ("\e$,1?5?J\e(B" . "\e$,44h5&5#\e(B") ("\e$,1?5?K\e(B" . "\e$,44h5&5#4~\e(B") | |
241 | ("\e$,1?6?M\e(B" . "\e$,44k5)\e(B") ("\e$,1?6\e(B" . "\e$,44k\e(B") ("\e$,1?6??\e(B" . "\e$,44l\e(B") ("\e$,1?6?@\e(B" . "\e$,44l4~\e(B") | |
242 | ("\e$,1?7?M\e(B" . "\e$,44n5)\e(B") ("\e$,1?7\e(B" . "\e$,44n\e(B") ("\e$,1?7??\e(B" . "\e$,44o\e(B") ("\e$,1?7?@\e(B" . "\e$,44o4~\e(B") | |
243 | ("\e$,1?8?M\e(B" . "\e$,44q5)\e(B") ("\e$,1?8\e(B" . "\e$,44q\e(B") ("\e$,1?8??\e(B" . "\e$,44r\e(B") ("\e$,1?8?@\e(B" . "\e$,44r4~\e(B") | |
244 | ("\e$,1?9?M\e(B" . "\e$,44t5)\e(B") ("\e$,1?9\e(B" . "\e$,44t\e(B") ("\e$,1?9??\e(B" . "\e$,44u\e(B") ("\e$,1?9?@\e(B" . "\e$,44u4~\e(B") | |
245 | ("\e$,1?3?M\e(B" . "\e$,44w5)\e(B") ("\e$,1?3\e(B" . "\e$,44w\e(B") ("\e$,1?3??\e(B" . "\e$,44x\e(B") ("\e$,1?3?@\e(B" . "\e$,44x4~\e(B")) | |
246 | "Kannada characters to glyphs conversion table. | |
247 | Default value contains only the basic rules.") | |
248 | ||
249 | (defvar knd-char-glyph-hash | |
250 | (let* ((hash (make-hash-table :test 'equal))) | |
251 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
252 | knd-char-glyph) | |
253 | hash)) | |
254 | ||
255 | (defvar knd-char-glyph-regexp | |
256 | (kannada-regexp-of-hashtbl-keys knd-char-glyph-hash)) | |
257 | ||
258 | (defvar knd-conjunct-glyph | |
259 | '(("\e$,1>u\e(B" . "\e$,43Q\e(B") ("\e$,1>v\e(B" . "\e$,43U\e(B") ("\e$,1>w\e(B" . "\e$,43X\e(B") ("\e$,1>x\e(B" . "\e$,43[\e(B") ("\e$,1>y\e(B" . "\e$,43]\e(B") | |
260 | ("\e$,1>z\e(B" . "\e$,43`\e(B") ("\e$,1>{\e(B" . "\e$,43c\e(B") ("\e$,1>|\e(B" . "\e$,43g\e(B") ("\e$,1>}\e(B" . "\e$,43i\e(B") ("\e$,1>~\e(B" . "\e$,43k\e(B") | |
261 | ("\e$,1>\7f\e(B" . "\e$,43o\e(B") ("\e$,1? \e(B" . "\e$,43r\e(B") ("\e$,1?!\e(B" . "\e$,43u\e(B") ("\e$,1?"\e(B" . "\e$,43x\e(B") ("\e$,1?#\e(B" . "\e$,43|\e(B") | |
262 | ("\e$,1?$\e(B" . "\e$,44A\e(B") ("\e$,1?%\e(B" . "\e$,44D\e(B") ("\e$,1?&\e(B" . "\e$,44G\e(B") ("\e$,1?'\e(B" . "\e$,44J\e(B") ("\e$,1?(\e(B" . "\e$,44M\e(B") | |
263 | ("\e$,1?*\e(B" . "\e$,44P\e(B") ("\e$,1?+\e(B" . "\e$,44U\e(B") ("\e$,1?,\e(B" . "\e$,44Y\e(B") ("\e$,1?-\e(B" . "\e$,44\\e(B") ("\e$,1?.\e(B" . "\e$,44]\e(B") | |
264 | ("\e$,1?/\e(B" . "\e$,44`\e(B") ("\e$,1?0\e(B" . "\e$,44c\e(B") ("\e$,1?2\e(B" . "\e$,44g\e(B") ("\e$,1?3\e(B" . "\e$,44y\e(B") ("\e$,1?5\e(B" . "\e$,44j\e(B") | |
265 | ("\e$,1?6\e(B" . "\e$,44m\e(B") ("\e$,1?7\e(B" . "\e$,44p\e(B") ("\e$,1?8\e(B" . "\e$,44s\e(B") ("\e$,1?9\e(B" . "\e$,44v\e(B")) | |
266 | "Kannada characters to conjunct glyphs conversion table.") | |
267 | ||
268 | (defvar knd-conjunct-glyph-hash | |
269 | (let* ((hash (make-hash-table :test 'equal))) | |
270 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
271 | knd-conjunct-glyph) | |
272 | hash)) | |
273 | ||
274 | (defvar knd-conjunct-glyph-regexp | |
275 | (kannada-regexp-of-hashtbl-vals knd-conjunct-glyph-hash)) | |
276 | ||
277 | (mapc | |
278 | (function (lambda (x) | |
279 | (put-char-code-property (aref (cdr x) 0) 'reference-point '(5 . 3)))) | |
280 | knd-conjunct-glyph) | |
281 | ||
282 | ;; glyph-to-glyph conversion table. | |
283 | ;; it is supposed that glyphs are ordered in | |
284 | ;; [consonant/nukta] - [matra/virama] - [preceding-r] - [anuswar]. | |
285 | ||
286 | (defvar knd-glyph-glyph | |
287 | '(("\e$,45$4A\e(B" . "\e$,45*\e(B") | |
288 | ("\e$,45'4A\e(B" . "\e$,45+\e(B") | |
289 | ("\e$,44A3g\e(B" . "\e$,45,\e(B") | |
290 | ("\e$,45$3Q\e(B" . "\e$,45-\e(B"))) | |
291 | ||
292 | (defvar knd-glyph-glyph-hash | |
293 | (let* ((hash (make-hash-table :test 'equal))) | |
294 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
295 | knd-glyph-glyph) | |
296 | hash)) | |
297 | (defvar knd-glyph-glyph-regexp | |
298 | (kannada-regexp-of-hashtbl-keys knd-glyph-glyph-hash)) | |
299 | ||
300 | (defun knd-charseq (from &optional to) | |
301 | (if (null to) (setq to from)) | |
302 | (mapcar (function (lambda (x) (indian-glyph-char x 'kannada))) | |
303 | (kannada-range from to))) | |
304 | ||
305 | (defvar knd-glyph-cv | |
306 | (append | |
307 | (knd-charseq #x40 #x50) | |
308 | (knd-charseq #x52 #x54) | |
309 | (knd-charseq #x56 #x57) | |
310 | (knd-charseq #x59 #x5a) | |
311 | (knd-charseq #x5c) | |
312 | (knd-charseq #x5e #x5f) | |
313 | (knd-charseq #x61 #x62) | |
314 | (knd-charseq #x64 #x66) | |
315 | (knd-charseq #x6a) | |
316 | (knd-charseq #x6c #x6e) | |
317 | (knd-charseq #x70 #x71) | |
318 | (knd-charseq #x73 #x74) | |
319 | (knd-charseq #x76 #x77) | |
320 | (knd-charseq #x79 #x7b) | |
321 | (knd-charseq #x7d #x7e) | |
322 | (knd-charseq #xa2 #xa3) | |
323 | (knd-charseq #xa5 #xa6) | |
324 | (knd-charseq #xa8 #xa9) | |
325 | (knd-charseq #xab #xac) | |
326 | (knd-charseq #xae #xaf) | |
327 | (knd-charseq #xb1 #xb2) | |
328 | (knd-charseq #xb6 #xb8) | |
329 | (knd-charseq #xb6 #xb8) | |
330 | (knd-charseq #xba #xbb) | |
331 | (knd-charseq #xbe #xbf) | |
332 | (knd-charseq #xc1 #xc2) | |
333 | (knd-charseq #xc4 #xc6) | |
334 | (knd-charseq #xc8 #xc9) | |
335 | (knd-charseq #xcb #xcc) | |
336 | (knd-charseq #xce #xcf) | |
337 | (knd-charseq #xd1 #xd2) | |
338 | (knd-charseq #xd4 #xd5) | |
339 | (knd-charseq #xd7 #xd8) | |
340 | (knd-charseq #xc3)) | |
341 | "Kannada Consonants/Vowels/Nukta Glyphs") | |
342 | ||
343 | (defvar knd-glyph-space | |
344 | (knd-charseq #xb3 #xb4) | |
345 | "Kannada Spacing Glyphs") | |
346 | ||
347 | (defvar knd-glyph-right-modifier | |
348 | (append | |
349 | (knd-charseq #xdb #xdd) | |
350 | (knd-charseq #xdf) | |
351 | (knd-charseq #xe0 #xe3) | |
352 | (knd-charseq #xe9)) | |
353 | "Kannada Modifiers attached at the right side.") | |
354 | ||
355 | (defvar knd-glyph-right-modifier-regexp | |
356 | (concat "[" knd-glyph-right-modifier "]")) | |
357 | ||
358 | (defvar knd-glyph-jha-tail | |
359 | (knd-charseq #x68) | |
360 | "Kannada tail for jha.") | |
361 | ||
362 | (defvar knd-glyph-top-matra | |
363 | (append | |
364 | (knd-charseq #xda) | |
365 | (knd-charseq #xdd) | |
366 | (knd-charseq #xe6) | |
367 | (knd-charseq #xe8)) | |
368 | "Kannada Matras attached at the top side.") | |
369 | ||
370 | (defvar knd-glyph-bottom-matra | |
371 | (append | |
372 | (knd-charseq #xe4 #xe5) | |
373 | (knd-charseq #xe7)) | |
374 | "Kannada Matras attached at the bottom.") | |
375 | ||
376 | (defvar knd-glyph-end-marks | |
377 | (append | |
378 | (knd-charseq #x25) | |
379 | (knd-charseq #x4d #x4e) | |
380 | (knd-charseq #xde)) | |
381 | "Kannada end marks: arkavattu, virama, au and diirghaa.") | |
382 | ||
383 | (defvar knd-glyph-bottom-modifier | |
384 | (append | |
385 | (knd-charseq #x51) | |
386 | (knd-charseq #x55) | |
387 | (knd-charseq #x58) | |
388 | (knd-charseq #x5b) | |
389 | (knd-charseq #x5d) | |
390 | (knd-charseq #x60) | |
391 | (knd-charseq #x63) | |
392 | (knd-charseq #x67) | |
393 | (knd-charseq #x69) | |
394 | (knd-charseq #x6b) | |
395 | (knd-charseq #x6f) | |
396 | (knd-charseq #x72) | |
397 | (knd-charseq #x75) | |
398 | (knd-charseq #x78) | |
399 | (knd-charseq #x7c) | |
400 | (knd-charseq #xa1) | |
401 | (knd-charseq #xa4) | |
402 | (knd-charseq #xa7) | |
403 | (knd-charseq #xaa) | |
404 | (knd-charseq #xad) | |
405 | (knd-charseq #xb0) | |
406 | (knd-charseq #xb5) | |
407 | (knd-charseq #xb9) | |
408 | (knd-charseq #xbc #xbd) | |
409 | (knd-charseq #xc0) | |
410 | (knd-charseq #xc3) | |
411 | (knd-charseq #xc7) | |
412 | (knd-charseq #xca) | |
413 | (knd-charseq #xcd) | |
414 | (knd-charseq #xd0) | |
415 | (knd-charseq #xd3) | |
416 | (knd-charseq #xd6) | |
417 | (knd-charseq #xd9) | |
418 | (knd-charseq #xea #xef)) | |
419 | "Kannada Modifiers attached at the bottom.") | |
420 | ||
421 | (defvar knd-glyph-order | |
422 | `((,knd-glyph-cv . 1) | |
423 | (,knd-glyph-top-matra . 2) | |
424 | (,knd-glyph-jha-tail . 3) | |
425 | (,knd-glyph-right-modifier . 4) | |
426 | (,knd-glyph-space . 5) | |
427 | (,knd-glyph-bottom-modifier . 5) | |
428 | (,knd-glyph-bottom-matra . 6) | |
429 | (,knd-glyph-end-marks . 7) | |
430 | )) | |
431 | ||
432 | (mapc | |
433 | (function (lambda (x) | |
434 | (mapc | |
435 | (function (lambda (y) | |
436 | (put-char-code-property y 'composition-order (cdr x)))) | |
437 | (car x)))) | |
438 | knd-glyph-order) | |
439 | ||
440 | (defun kannada-compose-syllable-string (string) | |
441 | (with-temp-buffer | |
442 | (insert (decompose-string string)) | |
443 | (kannada-compose-syllable-region (point-min) (point-max)) | |
444 | (buffer-string))) | |
445 | ||
446 | ;; kch | |
447 | (defun kannada-compose-syllable-region (from to) | |
448 | "Compose kannada syllable in region FROM to TO." | |
449 | (let ((glyph-str nil) (cons-num 0) (glyph-str-list nil) | |
450 | (last-virama nil) (preceding-r nil) (last-modifier nil) | |
451 | (last-char (char-before to)) match-str pos | |
452 | glyph-block split-pos (conj nil) (rest nil)) | |
453 | (save-excursion | |
454 | (save-restriction | |
455 | ;;; *** char-to-glyph conversion *** | |
456 | ;; Special rule 1. -- Last virama must be preserved. | |
457 | (if (eq last-char ?\e$,1?M\e(B) | |
458 | (progn | |
459 | (setq last-virama t) | |
460 | (narrow-to-region from (1- to))) | |
461 | (narrow-to-region from to)) | |
462 | (goto-char (point-min)) | |
463 | ;; Special rule 2. -- preceding "r virama" must be modifier. | |
464 | (when (looking-at "\e$,1?0?M\e(B.") | |
465 | (setq preceding-r t) | |
466 | (goto-char (+ 2 (point)))) | |
467 | ;; remove conjunct consonants | |
468 | (while (re-search-forward knd-char-glyph-regexp nil t) | |
469 | (setq match-str (match-string 0)) | |
470 | (if (and (string-match kannada-consonant match-str) | |
471 | (> cons-num 0)) | |
472 | (progn | |
473 | (setq conj (concat conj (gethash (match-string 0 match-str) | |
474 | knd-conjunct-glyph-hash))) | |
475 | (setq match-str (replace-match "" t nil match-str)) | |
476 | (if (string-match "\e$,1?M\e(B" rest) | |
477 | (setq rest (replace-match "" t nil rest))))) | |
478 | (setq rest (concat rest match-str)) | |
479 | ;; count the number of consonant-glyhs. | |
480 | (if (string-match kannada-consonant match-str) | |
481 | (setq cons-num (1+ cons-num)))) | |
482 | ;; translate the rest characters into glyphs | |
483 | (setq pos 0) | |
484 | (while (string-match knd-char-glyph-regexp rest pos) | |
485 | (setq match-str (match-string 0 rest)) | |
486 | (setq pos (match-end 0)) | |
487 | (setq glyph-str | |
488 | (concat glyph-str (gethash match-str knd-char-glyph-hash)))) | |
489 | ||
490 | (if conj (setq glyph-str (concat glyph-str conj))) | |
491 | (if last-virama (setq glyph-str (concat glyph-str "\e$,45)\e(B")) | |
492 | (goto-char (point-min)) | |
493 | (if (re-search-forward kannada-consonant-needs-twirl nil t) | |
494 | (progn | |
495 | (setq match-str (match-string 0)) | |
496 | (setq glyph-str (concat glyph-str "\e$,44z\e(B"))))) | |
497 | ;; preceding-r must be attached | |
498 | (if preceding-r | |
499 | (setq glyph-str (concat glyph-str "\e$,43%\e(B"))) | |
500 | ;;; *** glyph-to-glyph conversion *** | |
501 | (when (string-match knd-glyph-glyph-regexp glyph-str) | |
502 | (setq glyph-str | |
503 | (replace-match (gethash (match-string 0 glyph-str) | |
504 | knd-glyph-glyph-hash) | |
505 | nil t glyph-str))) | |
506 | ;;; *** glyph reordering *** | |
507 | (while (setq split-pos (string-match "\e$,45)\e(B\\|.$" glyph-str)) | |
508 | (setq glyph-block (substring glyph-str 0 (1+ split-pos))) | |
509 | (setq glyph-str (substring glyph-str (1+ split-pos))) | |
510 | (setq | |
511 | glyph-block | |
512 | (sort (string-to-list glyph-block) | |
513 | (function (lambda (x y) | |
514 | (< (get-char-code-property x 'composition-order) | |
515 | (get-char-code-property y 'composition-order)))))) | |
516 | (setq glyph-str-list (nconc glyph-str-list glyph-block))) | |
517 | ;;; *** insert space glyphs for kerning *** | |
518 | (if (> cons-num 0) | |
519 | (let ((curr glyph-str-list) (prev nil) (last-bott nil) bott co) | |
520 | (while curr | |
521 | (setq co (get-char-code-property | |
522 | (car curr) 'composition-order) | |
523 | bott (or (eq co 5) (eq co 6))) | |
524 | (if (and bott last-bott) | |
525 | (setcdr prev (cons ?\e$,44T\e(B curr))) | |
526 | (setq last-bott bott prev curr curr (cdr curr))))) | |
527 | ;; concatenate and attach reference-points. | |
528 | (setq glyph-str | |
529 | (cdr | |
530 | (apply | |
531 | 'nconc | |
532 | (mapcar | |
533 | (function (lambda (x) | |
534 | (list | |
535 | (or (get-char-code-property x 'reference-point) | |
536 | '(5 . 3) ;; default reference point. | |
537 | ) | |
538 | x))) | |
539 | glyph-str-list)))))) | |
540 | (compose-region from to glyph-str))) | |
541 | ||
542 | (provide 'knd-util) | |
543 | ||
e4eaf898 | 544 | ;;; arch-tag: 78d32230-a960-46a5-b622-61ed6ffcf8fc |
75b62c63 | 545 | ;;; knd-util.el ends here |