Commit | Line | Data |
---|---|---|
6b61353c KH |
1 | ;;; knd-util.el --- Support for composing Kannada characters |
2 | ||
3 | ;; Copyright (C) 2003 Free Software Foundation, Inc. | |
4 | ||
5 | ;; Maintainer: Maintainer: CHOWKSEY, Kailash C. <klchxbec@m-net.arbornet.org> | |
6 | ;; Keywords: multilingual, Kannada | |
7 | ||
8 | ;; This file is part of GNU Emacs. | |
9 | ||
10 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
11 | ;; it under the terms of the GNU General Public License as published by | |
12 | ;; the Free Software Foundation; either version 2, or (at your option) | |
13 | ;; any later version. | |
14 | ||
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
22 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
23 | ;; Boston, MA 02111-1307, USA. | |
24 | ||
25 | ;; Created: Jul. 14. 2003 | |
26 | ||
27 | ;;; Commentary: | |
28 | ||
29 | ;; This file provides character(Unicode) to glyph(CDAC) conversion and | |
30 | ;; composition of Kannada script characters. | |
31 | ||
32 | ;;; Code: | |
33 | ||
34 | ;;;###autoload | |
35 | ||
36 | ;; Kannada Composable Pattern | |
37 | ;; C .. Consonants | |
38 | ;; V .. Vowel | |
39 | ;; H .. Virama | |
40 | ;; M .. Matra | |
41 | ;; V .. Vowel | |
42 | ;; (N .. Zerowidth Non Joiner) | |
43 | ;; (J .. Zerowidth Joiner. ) | |
44 | ;; 1. vowel | |
45 | ;; V(A)? | |
46 | ;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya) | |
47 | ;; ((CH)?(CH)?(CH)?CH)?C(H|M?)? | |
48 | ||
49 | (defconst kannada-consonant | |
50 | "[\e$,1>u\e(B-\e$,1?9\e(B]") | |
51 | ||
52 | (defconst kannada-consonant-needs-twirl | |
53 | "[\e$,1>u>w\e(B-\e$,1>{>}\e(B-\e$,1>~? \e(B-\e$,1?"?$\e(B-\e$,1?+?-?0?3\e(B-\e$,1?9\e(B]\\(\e$,1?M\e(B[\e$,1>u\e(B-\e$,1?9\e(B]\\)*[\e$,1?A?B?C?D>b\e(B]?$") | |
54 | ||
55 | (defconst kannada-composable-pattern | |
56 | (concat | |
57 | "\\([\e$,1>b\e(B-\e$,1>t?`>l\e(B]\\)\\|[\e$,1>c\e(B]" | |
58 | "\\|\\(" | |
59 | "\\(?:\\(?:[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?\\(?:[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?\\(?:[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?" | |
60 | "[\e$,1>u\e(B-\e$,1?9\e(B]\\(?:\e$,1?M\e(B\\|[\e$,1?>\e(B-\e$,1?M?U?C\e(B]?\\)?" | |
61 | "\\)") | |
62 | "Regexp matching a composable sequence of Kannada characters.") | |
63 | ||
64 | ;;;###autoload | |
65 | (defun kannada-compose-region (from to) | |
66 | (interactive "r") | |
67 | (save-excursion | |
68 | (save-restriction | |
69 | (narrow-to-region from to) | |
70 | (goto-char (point-min)) | |
71 | (while (re-search-forward kannada-composable-pattern nil t) | |
72 | (kannada-compose-syllable-region (match-beginning 0) | |
73 | (match-end 0)))))) | |
74 | ;;;###autoload | |
75 | (defun kannada-compose-string (string) | |
76 | (with-temp-buffer | |
77 | (insert (decompose-string string)) | |
78 | (kannada-compose-region (point-min) (point-max)) | |
79 | (buffer-string))) | |
80 | ||
81 | ;;;###autoload | |
82 | (defun kannada-post-read-conversion (len) | |
83 | (save-excursion | |
84 | (save-restriction | |
85 | (let ((buffer-modified-p (buffer-modified-p))) | |
86 | (narrow-to-region (point) (+ (point) len)) | |
87 | (kannada-compose-region (point-min) (point-max)) | |
88 | (set-buffer-modified-p buffer-modified-p) | |
89 | (- (point-max) (point-min)))))) | |
90 | ||
91 | (defun kannada-range (from to) | |
92 | "Make the list of the integers of range FROM to TO." | |
93 | (let (result) | |
94 | (while (<= from to) (setq result (cons to result) to (1- to))) result)) | |
95 | ||
96 | (defun kannada-regexp-of-hashtbl-keys (hashtbl) | |
97 | "Return a regular expression that matches all keys in hashtable HASHTBL." | |
98 | (let ((max-specpdl-size 1000)) | |
99 | (regexp-opt | |
100 | (sort | |
101 | (let (dummy) | |
102 | (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl) | |
103 | dummy) | |
104 | (function (lambda (x y) (> (length x) (length y)))))))) | |
105 | ||
106 | (defun kannada-regexp-of-hashtbl-vals (hashtbl) | |
107 | "Return a regular expression that matches all values in hashtable HASHTBL." | |
108 | (let ((max-specpdl-size 1000)) | |
109 | (regexp-opt | |
110 | (sort | |
111 | (let (dummy) | |
112 | (maphash (function (lambda (key val) (setq dummy (cons val dummy)))) hashtbl) | |
113 | dummy) | |
114 | (function (lambda (x y) (> (length x) (length y)))))))) | |
115 | ||
116 | ;;;###autoload | |
117 | (defun kannada-composition-function (pos &optional string) | |
118 | "Compose Kannada characters after the position POS. | |
119 | If STRING is not nil, it is a string, and POS is an index to the string. | |
120 | In this case, compose characters after POS of the string." | |
121 | (if string | |
122 | ;; Not yet implemented. | |
123 | nil | |
124 | (goto-char pos) | |
125 | (if (looking-at kannada-composable-pattern) | |
126 | (prog1 (match-end 0) | |
127 | (kannada-compose-syllable-region pos (match-end 0)))))) | |
128 | ||
129 | ;; Notes on conversion steps. | |
130 | ||
131 | ;; 1. chars to glyphs | |
132 | ;; | |
133 | ;; Rules will not be applied to the virama appeared at the end of the | |
134 | ;; text. Also, the preceding/following "r" will be treated as special case. | |
135 | ||
136 | ;; 2. glyphs reordering. | |
137 | ;; | |
138 | ;; The glyphs are split by virama, and each glyph groups are | |
139 | ;; re-ordered in the following order. | |
140 | ;; | |
141 | ;; Note that `consonant-glyph' mentioned here does not contain the | |
142 | ;; vertical bar (right modifier) attached at the right of the | |
143 | ;; consonant. | |
144 | ;; | |
145 | ;; If the glyph-group contains right modifier, | |
146 | ;; (1) consonant-glyphs/vowels | |
147 | ;; (2) spacing | |
148 | ;; (3) right modifier (may be matra) | |
149 | ;; (4) top matra | |
150 | ;; (5) preceding "r" | |
151 | ;; (7) following "r" | |
152 | ;; (8) bottom matra or virama. | |
153 | ;; | |
154 | ;; Otherwise, | |
155 | ;; (1) consonant-glyph/vowels, with nukta sign | |
156 | ;; (3) left matra | |
157 | ;; (4) top matra | |
158 | ;; (5) preceding "r" | |
159 | ;; (7) following "r" | |
160 | ;; (8) bottom matra or virama. | |
161 | ;; (2) spacing | |
162 | ||
163 | ;; 3. glyph to glyph | |
164 | ;; | |
165 | ;; For better display, some glyph display would be tuned. | |
166 | ||
167 | ;; 4. Composition. | |
168 | ;; | |
169 | ;; left modifiers will be attached at the left. | |
170 | ;; others will be attached right. | |
171 | ||
172 | ;; Problem:: | |
173 | ;; Can we generalize this methods to other Indian scripts? | |
174 | ||
175 | (defvar knd-char-glyph | |
176 | '(("\e$,1>e\e(B" . "\e$,43@\e(B") | |
177 | ("\e$,1>f\e(B" . "\e$,43A\e(B") | |
178 | ("\e$,1?>\e(B" . "\e$,44{\e(B") | |
179 | ("\e$,1>g\e(B" . "\e$,43B\e(B") | |
180 | ("\e$,1??\e(B" . nil) | |
181 | ("\e$,1>h\e(B" . "\e$,43C\e(B") | |
182 | ("\e$,1?@\e(B" . nil) | |
183 | ("\e$,1>i\e(B" . "\e$,43D\e(B") | |
184 | ("\e$,1?A\e(B" . "\\e$,44\7f\e(B") | |
185 | ("\e$,1>j\e(B" . "\e$,43E\e(B") | |
186 | ("\e$,1?B\e(B" . "\\e$,45 \e(B") | |
187 | ("\e$,1>k\e(B" . "\e$,43F4\7f\e(B") | |
188 | ("\e$,1?C\e(B" . "\\e$,45$\e(B") | |
189 | ("\e$,1?`\e(B" . "\e$,43F5 \e(B") | |
190 | ("\e$,1?D\e(B" . "\\e$,45%\e(B") | |
191 | ;;("\e$,1>l\e(B" . nil) ; not implemented. | |
192 | ;;("\e$,1?a\e(B" . nil) | |
193 | ("\e$,1>n\e(B" . "\e$,43G\e(B") | |
194 | ("\e$,1>o\e(B" . "\e$,43H\e(B") | |
195 | ("\e$,1>p\e(B" . "\e$,43I\e(B") | |
196 | ("\e$,1?F\e(B" . "\\e$,45&\e(B") | |
197 | ("\e$,1?G\e(B" . "\\e$,45&4~\e(B") | |
198 | ("\e$,1?H\e(B" . "\\e$,45&5'\e(B") | |
199 | ("\e$,1>r\e(B" . "\e$,43J\e(B") | |
200 | ("\e$,1?J\e(B" . "\e$,45&5 \e(B") | |
201 | ("\e$,1>s\e(B" . "\e$,43K\e(B") | |
202 | ("\e$,1?K\e(B" . "\\e$,45&5 4~\e(B") | |
203 | ("\e$,1>t\e(B" . "\e$,43L\e(B") | |
204 | ("\e$,1?L\e(B" . "\\e$,45(\e(B") | |
205 | ("\e$,1>b\e(B" . "\e$,43M\e(B") | |
206 | ("\e$,1>c\e(B" . "\e$,43N\e(B") | |
207 | ("\e$,1>u?M\e(B" . "\e$,43O5)\e(B") ("\e$,1>u\e(B" . "\e$,43O\e(B") ("\e$,1>u??\e(B" . "\e$,43P\e(B") ("\e$,1>u?@\e(B" . "\e$,43P4~\e(B") | |
208 | ("\e$,1>v?M\e(B" . "\e$,43S5)\e(B") ("\e$,1>v\e(B" . "\e$,43S\e(B") ("\e$,1>v??\e(B" . "\e$,43T\e(B") ("\e$,1>v?@\e(B" . "\e$,43T4~\e(B") ("\e$,1>v?F\e(B" . "\e$,43S5&\e(B") ("\e$,1>v?G\e(B" . "\e$,43S5&4~\e(B") ("\e$,1>v?H\e(B" . "\e$,43S5&5'\e(B") ("\e$,1>v?J\e(B" . "\e$,43S5&5&5 \e(B") ("\e$,1>v?K\e(B" . "\e$,43S5&5&5 4~\e(B") ("\e$,1>v?L\e(B" . "\e$,43S5(\e(B") | |
209 | ("\e$,1>w?M\e(B" . "\e$,43V5)\e(B") ("\e$,1>w\e(B" . "\e$,43V\e(B") ("\e$,1>w??\e(B" . "\e$,43W\e(B") ("\e$,1>w?@\e(B" . "\e$,43W4~\e(B") | |
210 | ("\e$,1>x?M\e(B" . "\e$,43Y5)\e(B") ("\e$,1>x\e(B" . "\e$,43Y\e(B") ("\e$,1>x??\e(B" . "\e$,43Z\e(B") ("\e$,1>x?@\e(B" . "\e$,43Z4~\e(B") | |
211 | ("\e$,1>y?M\e(B" . "\e$,43\5)\e(B") ("\e$,1>y\e(B" . "\e$,43\\e(B") | |
212 | ("\e$,1>z?M\e(B" . "\e$,43^5)\e(B") ("\e$,1>z\e(B" . "\e$,43^\e(B") ("\e$,1>z??\e(B" . "\e$,43_\e(B") ("\e$,1>z?@\e(B" . "\e$,43_4~\e(B") | |
213 | ("\e$,1>{?M\e(B" . "\e$,43a5)\e(B") ("\e$,1>{\e(B" . "\e$,43a\e(B") ("\e$,1>{??\e(B" . "\e$,43b\e(B") ("\e$,1>{?@\e(B" . "\e$,43b4~\e(B") | |
214 | ("\e$,1>|?M\e(B" . "\e$,43d5)\e(B") ("\e$,1>|\e(B" . "\e$,43d\e(B") ("\e$,1>|??\e(B" . "\e$,43f\e(B") ("\e$,1>|?@\e(B" . "\e$,43f4~\e(B") ("\e$,1>|?F\e(B" . "\e$,43e5&\e(B") ("\e$,1>|?G\e(B" . "\e$,43e5&4~\e(B") ("\e$,1>|?H\e(B" . "\e$,43e5&5'\e(B") ("\e$,1>|?J\e(B" . "\e$,43e5&5&5 \e(B") ("\e$,1>|?K\e(B" . "\e$,43e5&5&5 4~\e(B") ("\e$,1>|?L\e(B" . "\e$,43e5(\e(B") | |
215 | ("\e$,1>}?M\e(B" . "\e$,44a4z3h4\7f5)\e(B") ("\e$,1>}\e(B" . "\e$,44a4z3h4\7f\e(B") ("\e$,1>}??\e(B" . "\e$,44b3h4\7f\e(B") ("\e$,1>}?@\e(B" . "\e$,44b3h4\7f4~\e(B") ("\e$,1>}?B\e(B". "\e$,44a4z3h5 \e(B") ("\e$,1>}?J\e(B". "\e$,44a5&3h5 \e(B") ("\e$,1>}?K\e(B". "\e$,44a5&3h5 4~\e(B") | |
216 | ("\e$,1>~?M\e(B" . "\e$,43j5)\e(B") ("\e$,1>~\e(B" . "\e$,43j\e(B") | |
217 | ("\e$,1>\7f?M\e(B" . "\e$,43m5)\e(B") ("\e$,1>\7f\e(B" . "\e$,43l\e(B") ("\e$,1?#?>\e(B" . "\e$,43m4{\e(B") ("\e$,1>\7f??\e(B" . "\e$,43n\e(B") ("\e$,1>\7f?@\e(B" . "\e$,43n4~\e(B") ("\e$,1>\7f?F\e(B" . "\e$,43m5&\e(B") ("\e$,1>\7f?G\e(B" . "\e$,43m5&4~\e(B") ("\e$,1>\7f?H\e(B" . "\e$,43m5&5'\e(B") ("\e$,1>\7f?J\e(B" . "\e$,43m5&5&5 \e(B") ("\e$,1>\7f?K\e(B" . "\e$,43m5&5&5 4~\e(B") ("\e$,1>\7f?L\e(B" . "\e$,43m5(\e(B") | |
218 | ("\e$,1? ?M\e(B" . "\e$,43p5)\e(B") ("\e$,1? \e(B" . "\e$,43p\e(B") ("\e$,1? ??\e(B" . "\e$,43q\e(B") ("\e$,1? ?@\e(B" . "\e$,43q4~\e(B") | |
219 | ("\e$,1?!?M\e(B" . "\e$,43s5)\e(B") ("\e$,1?!\e(B" . "\e$,43s\e(B") ("\e$,1?!??\e(B" . "\e$,43t\e(B") ("\e$,1?!?@\e(B" . "\e$,43t4~\e(B") | |
220 | ("\e$,1?"?M\e(B" . "\e$,43v5)\e(B") ("\e$,1?"\e(B" . "\e$,43v\e(B") ("\e$,1?"??\e(B" . "\e$,43w\e(B") ("\e$,1?"?@\e(B" . "\e$,43w4~\e(B") | |
221 | ("\e$,1?#?M\e(B" . "\e$,43z5)\e(B") ("\e$,1?#\e(B" . "\e$,43y\e(B") ("\e$,1?#?>\e(B" . "\e$,43z4{\e(B") ("\e$,1?#??\e(B" . "\e$,43{\e(B") ("\e$,1?#?@\e(B" . "\e$,43{4~\e(B") ("\e$,1?#?F\e(B" . "\e$,43z5&\e(B") ("\e$,1?#?G\e(B" . "\e$,43z5&4~\e(B") ("\e$,1?#?H\e(B" . "\e$,43z5&5'\e(B") ("\e$,1?#?J\e(B" . "\e$,43z5&5&5 \e(B") ("\e$,1?#?K\e(B" . "\e$,43z5&5&5 4~\e(B") ("\e$,1?#?L\e(B" . "\e$,43z5(\e(B") | |
222 | ("\e$,1?$?M\e(B" . "\e$,43}5)\e(B") ("\e$,1?$\e(B" . "\e$,43}\e(B") ("\e$,1?$??\e(B" . "\e$,43~\e(B") ("\e$,1?$?@\e(B" . "\e$,43~4~\e(B") | |
223 | ("\e$,1?%?M\e(B" . "\e$,44B5)\e(B") ("\e$,1?%\e(B" . "\e$,44B\e(B") ("\e$,1?%??\e(B" . "\e$,44C\e(B") ("\e$,1?%?@\e(B" . "\e$,44C4~\e(B") | |
224 | ("\e$,1?&?M\e(B" . "\e$,44E5)\e(B") ("\e$,1?&\e(B" . "\e$,44E\e(B") ("\e$,1?&??\e(B" . "\e$,44F\e(B") ("\e$,1?&?@\e(B" . "\e$,44F4~\e(B") | |
225 | ("\e$,1?'?M\e(B" . "\e$,44H5)\e(B") ("\e$,1?'\e(B" . "\e$,44H\e(B") ("\e$,1?'??\e(B" . "\e$,44I\e(B") ("\e$,1?'?@\e(B" . "\e$,44I4~\e(B") | |
226 | ("\e$,1?(?M\e(B" . "\e$,44K5)\e(B") ("\e$,1?(\e(B" . "\e$,44K\e(B") ("\e$,1?(??\e(B" . "\e$,44L\e(B") ("\e$,1?(?@\e(B" . "\e$,44L4~\e(B") | |
227 | ("\e$,1?*?M\e(B" . "\e$,44N5)\e(B") ("\e$,1?*\e(B" . "\e$,44N\e(B") ("\e$,1?*??\e(B" . "\e$,44O\e(B") ("\e$,1?*?@\e(B" . "\e$,44O4~\e(B") ("\e$,1?*?A\e(B" . "\e$,44N5"\e(B") ("\e$,1?*?B\e(B" . "\e$,44N5#\e(B") ("\e$,1?*?J\e(B" . "\e$,44N5&5#\e(B") ("\e$,1?*?K\e(B" . "\e$,44N5&5#4~\e(B") | |
228 | ("\e$,1?+?M\e(B" . "\e$,44Q5)\e(B") ("\e$,1?+\e(B" . "\e$,44Q\e(B") ("\e$,1?+??\e(B" . "\e$,44R\e(B") ("\e$,1?+?@\e(B" . "\e$,44R4~\e(B") ("\e$,1?+?A\e(B" . "\e$,44Q5"\e(B") ("\e$,1?+?B\e(B" . "\e$,44Q5#\e(B") ("\e$,1?+?J\e(B" . "\e$,44Q5&5#\e(B") ("\e$,1?+?K\e(B" . "\e$,44Q5&5#4~\e(B") | |
229 | ("\e$,1?,?M\e(B" . "\e$,44W5)\e(B") ("\e$,1?,\e(B" . "\e$,44V\e(B") ("\e$,1?,?>\e(B". "\e$,44W4{\e(B") ("\e$,1?,??\e(B" . "\e$,44X\e(B") ("\e$,1?,?@\e(B" . "\e$,44X4~\e(B") ("\e$,1?,?F\e(B" . "\e$,44W5&\e(B") ("\e$,1?,?G\e(B" . "\e$,44W5&4~\e(B") ("\e$,1?,?H\e(B" . "\e$,44W5&5'\e(B") ("\e$,1?,?J\e(B" . "\e$,44W5&5&5 \e(B") ("\e$,1?,?K\e(B" . "\e$,44W5&5&5 4~\e(B") ("\e$,1?,?L\e(B" . "\e$,44W5(\e(B") | |
230 | ("\e$,1?-?M\e(B" . "\e$,44Z5)\e(B") ("\e$,1?-\e(B" . "\e$,44Z\e(B") ("\e$,1?-??\e(B" . "\e$,44[\e(B") ("\e$,1?-?@\e(B" . "\e$,44[4~\e(B") | |
231 | ("\e$,1?.?M\e(B" . "\e$,44h5!5)\e(B") ("\e$,1?.\e(B" . "\e$,44h4z4\7f\e(B") ("\e$,1?.?>\e(B" . "\e$,44h4z5!4{\e(B") ("\e$,1?.??\e(B" . "\e$,44i4\7f\e(B") ("\e$,1?.?@\e(B" . "\e$,44i4\7f4~\e(B") ("\e$,1?.?J\e(B". "\e$,44h5&5 \e(B") ("\e$,1?.?K\e(B". "\e$,44h5&5 4~\e(B") | |
232 | ("\e$,1?/?M\e(B" . "\e$,44^4z5!5)\e(B") ("\e$,1?/\e(B" . "\e$,44^4z4\7f\e(B") ("\e$,1?/?>\e(B" . "\e$,44^4z5!4{\e(B")("\e$,1?/??\e(B" . "\e$,44_4\7f\e(B") ("\e$,1?/?@\e(B" . "\e$,44_4\7f4~\e(B") ("\e$,1?/?J\e(B" . "\e$,44^5&5 \e(B") ("\e$,1?/?K\e(B" . "\e$,44^5&5 4~\e(B") | |
233 | ("\e$,1?0?M\e(B" . "\e$,44a5)\e(B") ("\e$,1?0\e(B" . "\e$,44a\e(B") ("\e$,1?0??\e(B" . "\e$,44b\e(B") ("\e$,1?0?@\e(B" . "\e$,44b4~\e(B") | |
234 | ("\e$,1?0?M\e(B" . "\e$,44a5)\e(B") ("\e$,1?0\e(B" . "\e$,44a\e(B") ("\e$,1?0??\e(B" . "\e$,44b\e(B") ("\e$,1?0?@\e(B" . "\e$,44b4~\e(B") | |
235 | ("\e$,1?2?M\e(B" . "\e$,44e5)\e(B") ("\e$,1?2\e(B" . "\e$,44d\e(B") ("\e$,1?2?>\e(B" . "\e$,44e4{\e(B") ("\e$,1?2??\e(B" . "\e$,44f\e(B") ("\e$,1?2?@\e(B" . "\e$,44f4~\e(B") ("\e$,1?2?F\e(B" . "\e$,44e5&\e(B") ("\e$,1?2?G\e(B" . "\e$,44e5&4~\e(B") ("\e$,1?2?H\e(B" . "\e$,44e5&5'\e(B") ("\e$,1?2?J\e(B" . "\e$,44e5&5&5 \e(B") ("\e$,1?2?K\e(B" . "\e$,44e5&5&5 4~\e(B") ("\e$,1?2?L\e(B" . "\e$,44e5(\e(B") | |
236 | ("\e$,1?5?M\e(B" . "\e$,44h5)\e(B") ("\e$,1?5\e(B" . "\e$,44h\e(B") ("\e$,1?5??\e(B" . "\e$,44i\e(B") ("\e$,1?5?@\e(B" . "\e$,44i4~\e(B") ("\e$,1?5?A\e(B" . "\e$,44h5"\e(B") ("\e$,1?5?B\e(B" . "\e$,44h5#\e(B") ("\e$,1?5?J\e(B" . "\e$,44h5&5#\e(B") ("\e$,1?5?K\e(B" . "\e$,44h5&5#4~\e(B") | |
237 | ("\e$,1?6?M\e(B" . "\e$,44k5)\e(B") ("\e$,1?6\e(B" . "\e$,44k\e(B") ("\e$,1?6??\e(B" . "\e$,44l\e(B") ("\e$,1?6?@\e(B" . "\e$,44l4~\e(B") | |
238 | ("\e$,1?7?M\e(B" . "\e$,44n5)\e(B") ("\e$,1?7\e(B" . "\e$,44n\e(B") ("\e$,1?7??\e(B" . "\e$,44o\e(B") ("\e$,1?7?@\e(B" . "\e$,44o4~\e(B") | |
239 | ("\e$,1?8?M\e(B" . "\e$,44q5)\e(B") ("\e$,1?8\e(B" . "\e$,44q\e(B") ("\e$,1?8??\e(B" . "\e$,44r\e(B") ("\e$,1?8?@\e(B" . "\e$,44r4~\e(B") | |
240 | ("\e$,1?9?M\e(B" . "\e$,44t5)\e(B") ("\e$,1?9\e(B" . "\e$,44t\e(B") ("\e$,1?9??\e(B" . "\e$,44u\e(B") ("\e$,1?9?@\e(B" . "\e$,44u4~\e(B") | |
241 | ("\e$,1?3?M\e(B" . "\e$,44w5)\e(B") ("\e$,1?3\e(B" . "\e$,44w\e(B") ("\e$,1?3??\e(B" . "\e$,44x\e(B") ("\e$,1?3?@\e(B" . "\e$,44x4~\e(B")) | |
242 | "Kannada characters to glyphs conversion table. | |
243 | Default value contains only the basic rules.") | |
244 | ||
245 | (defvar knd-char-glyph-hash | |
246 | (let* ((hash (make-hash-table :test 'equal))) | |
247 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
248 | knd-char-glyph) | |
249 | hash)) | |
250 | ||
251 | (defvar knd-char-glyph-regexp | |
252 | (kannada-regexp-of-hashtbl-keys knd-char-glyph-hash)) | |
253 | ||
254 | (defvar knd-conjunct-glyph | |
255 | '(("\e$,1>u\e(B" . "\e$,43Q\e(B") ("\e$,1>v\e(B" . "\e$,43U\e(B") ("\e$,1>w\e(B" . "\e$,43X\e(B") ("\e$,1>x\e(B" . "\e$,43[\e(B") ("\e$,1>y\e(B" . "\e$,43]\e(B") | |
256 | ("\e$,1>z\e(B" . "\e$,43`\e(B") ("\e$,1>{\e(B" . "\e$,43c\e(B") ("\e$,1>|\e(B" . "\e$,43g\e(B") ("\e$,1>}\e(B" . "\e$,43i\e(B") ("\e$,1>~\e(B" . "\e$,43k\e(B") | |
257 | ("\e$,1>\7f\e(B" . "\e$,43o\e(B") ("\e$,1? \e(B" . "\e$,43r\e(B") ("\e$,1?!\e(B" . "\e$,43u\e(B") ("\e$,1?"\e(B" . "\e$,43x\e(B") ("\e$,1?#\e(B" . "\e$,43|\e(B") | |
258 | ("\e$,1?$\e(B" . "\e$,44A\e(B") ("\e$,1?%\e(B" . "\e$,44D\e(B") ("\e$,1?&\e(B" . "\e$,44G\e(B") ("\e$,1?'\e(B" . "\e$,44J\e(B") ("\e$,1?(\e(B" . "\e$,44M\e(B") | |
259 | ("\e$,1?*\e(B" . "\e$,44P\e(B") ("\e$,1?+\e(B" . "\e$,44U\e(B") ("\e$,1?,\e(B" . "\e$,44Y\e(B") ("\e$,1?-\e(B" . "\e$,44\\e(B") ("\e$,1?.\e(B" . "\e$,44]\e(B") | |
260 | ("\e$,1?/\e(B" . "\e$,44`\e(B") ("\e$,1?0\e(B" . "\e$,44c\e(B") ("\e$,1?2\e(B" . "\e$,44g\e(B") ("\e$,1?3\e(B" . "\e$,44y\e(B") ("\e$,1?5\e(B" . "\e$,44j\e(B") | |
261 | ("\e$,1?6\e(B" . "\e$,44m\e(B") ("\e$,1?7\e(B" . "\e$,44p\e(B") ("\e$,1?8\e(B" . "\e$,44s\e(B") ("\e$,1?9\e(B" . "\e$,44v\e(B")) | |
262 | "Kannada characters to conjunct glyphs conversion table.") | |
263 | ||
264 | (defvar knd-conjunct-glyph-hash | |
265 | (let* ((hash (make-hash-table :test 'equal))) | |
266 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
267 | knd-conjunct-glyph) | |
268 | hash)) | |
269 | ||
270 | (defvar knd-conjunct-glyph-regexp | |
271 | (kannada-regexp-of-hashtbl-vals knd-conjunct-glyph-hash)) | |
272 | ||
273 | (mapc | |
274 | (function (lambda (x) | |
275 | (put-char-code-property (aref (cdr x) 0) 'reference-point '(5 . 3)))) | |
276 | knd-conjunct-glyph) | |
277 | ||
278 | ;; glyph-to-glyph conversion table. | |
279 | ;; it is supposed that glyphs are ordered in | |
280 | ;; [consonant/nukta] - [matra/virama] - [preceding-r] - [anuswar]. | |
281 | ||
282 | (defvar knd-glyph-glyph | |
283 | '(("\e$,45$4A\e(B" . "\e$,45*\e(B") | |
284 | ("\e$,45'4A\e(B" . "\e$,45+\e(B") | |
285 | ("\e$,44A3g\e(B" . "\e$,45,\e(B") | |
286 | ("\e$,45$3Q\e(B" . "\e$,45-\e(B"))) | |
287 | ||
288 | (defvar knd-glyph-glyph-hash | |
289 | (let* ((hash (make-hash-table :test 'equal))) | |
290 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
291 | knd-glyph-glyph) | |
292 | hash)) | |
293 | (defvar knd-glyph-glyph-regexp | |
294 | (kannada-regexp-of-hashtbl-keys knd-glyph-glyph-hash)) | |
295 | ||
296 | (defun knd-charseq (from &optional to) | |
297 | (if (null to) (setq to from)) | |
298 | (number-sequence (decode-char 'kannada-cdac from) | |
299 | (decode-char 'kannada-cdac to))) | |
300 | ||
301 | (defvar knd-glyph-cv | |
302 | (append | |
303 | (knd-charseq #x40 #x50) | |
304 | (knd-charseq #x52 #x54) | |
305 | (knd-charseq #x56 #x57) | |
306 | (knd-charseq #x59 #x5a) | |
307 | (knd-charseq #x5c) | |
308 | (knd-charseq #x5e #x5f) | |
309 | (knd-charseq #x61 #x62) | |
310 | (knd-charseq #x64 #x66) | |
311 | (knd-charseq #x6a) | |
312 | (knd-charseq #x6c #x6e) | |
313 | (knd-charseq #x70 #x71) | |
314 | (knd-charseq #x73 #x74) | |
315 | (knd-charseq #x76 #x77) | |
316 | (knd-charseq #x79 #x7b) | |
317 | (knd-charseq #x7d #x7e) | |
318 | (knd-charseq #xa2 #xa3) | |
319 | (knd-charseq #xa5 #xa6) | |
320 | (knd-charseq #xa8 #xa9) | |
321 | (knd-charseq #xab #xac) | |
322 | (knd-charseq #xae #xaf) | |
323 | (knd-charseq #xb1 #xb2) | |
324 | (knd-charseq #xb6 #xb8) | |
325 | (knd-charseq #xb6 #xb8) | |
326 | (knd-charseq #xba #xbb) | |
327 | (knd-charseq #xbe #xbf) | |
328 | (knd-charseq #xc1 #xc2) | |
329 | (knd-charseq #xc4 #xc6) | |
330 | (knd-charseq #xc8 #xc9) | |
331 | (knd-charseq #xcb #xcc) | |
332 | (knd-charseq #xce #xcf) | |
333 | (knd-charseq #xd1 #xd2) | |
334 | (knd-charseq #xd4 #xd5) | |
335 | (knd-charseq #xd7 #xd8) | |
336 | (knd-charseq #xc3)) | |
337 | "Kannada Consonants/Vowels/Nukta Glyphs") | |
338 | ||
339 | (defvar knd-glyph-space | |
340 | (knd-charseq #xb3 #xb4) | |
341 | "Kannada Spacing Glyphs") | |
342 | ||
343 | (defvar knd-glyph-right-modifier | |
344 | (append | |
345 | (knd-charseq #xdb #xdd) | |
346 | (knd-charseq #xdf) | |
347 | (knd-charseq #xe0 #xe3) | |
348 | (knd-charseq #xe9)) | |
349 | "Kannada Modifiers attached at the right side.") | |
350 | ||
351 | (defvar knd-glyph-right-modifier-regexp | |
352 | (concat "[" knd-glyph-right-modifier "]")) | |
353 | ||
354 | (defvar knd-glyph-jha-tail | |
355 | (knd-charseq #x68) | |
356 | "Kannada tail for jha.") | |
357 | ||
358 | (defvar knd-glyph-top-matra | |
359 | (append | |
360 | (knd-charseq #xda) | |
361 | (knd-charseq #xdd) | |
362 | (knd-charseq #xe6) | |
363 | (knd-charseq #xe8)) | |
364 | "Kannada Matras attached at the top side.") | |
365 | ||
366 | (defvar knd-glyph-bottom-matra | |
367 | (append | |
368 | (knd-charseq #xe4 #xe5) | |
369 | (knd-charseq #xe7)) | |
370 | "Kannada Matras attached at the bottom.") | |
371 | ||
372 | (defvar knd-glyph-end-marks | |
373 | (append | |
374 | (knd-charseq #x25) | |
375 | (knd-charseq #x4d #x4e) | |
376 | (knd-charseq #xde)) | |
377 | "Kannada end marks: arkavattu, virama, au and diirghaa.") | |
378 | ||
379 | (defvar knd-glyph-bottom-modifier | |
380 | (append | |
381 | (knd-charseq #x51) | |
382 | (knd-charseq #x55) | |
383 | (knd-charseq #x58) | |
384 | (knd-charseq #x5b) | |
385 | (knd-charseq #x5d) | |
386 | (knd-charseq #x60) | |
387 | (knd-charseq #x63) | |
388 | (knd-charseq #x67) | |
389 | (knd-charseq #x69) | |
390 | (knd-charseq #x6b) | |
391 | (knd-charseq #x6f) | |
392 | (knd-charseq #x72) | |
393 | (knd-charseq #x75) | |
394 | (knd-charseq #x78) | |
395 | (knd-charseq #x7c) | |
396 | (knd-charseq #xa1) | |
397 | (knd-charseq #xa4) | |
398 | (knd-charseq #xa7) | |
399 | (knd-charseq #xaa) | |
400 | (knd-charseq #xad) | |
401 | (knd-charseq #xb0) | |
402 | (knd-charseq #xb5) | |
403 | (knd-charseq #xb9) | |
404 | (knd-charseq #xbc #xbd) | |
405 | (knd-charseq #xc0) | |
406 | (knd-charseq #xc3) | |
407 | (knd-charseq #xc7) | |
408 | (knd-charseq #xca) | |
409 | (knd-charseq #xcd) | |
410 | (knd-charseq #xd0) | |
411 | (knd-charseq #xd3) | |
412 | (knd-charseq #xd6) | |
413 | (knd-charseq #xd9) | |
414 | (knd-charseq #xea #xef)) | |
415 | "Kannada Modifiers attached at the bottom.") | |
416 | ||
417 | (defvar knd-glyph-order | |
418 | `((,knd-glyph-cv . 1) | |
419 | (,knd-glyph-top-matra . 2) | |
420 | (,knd-glyph-jha-tail . 3) | |
421 | (,knd-glyph-right-modifier . 4) | |
422 | (,knd-glyph-space . 5) | |
423 | (,knd-glyph-bottom-modifier . 5) | |
424 | (,knd-glyph-bottom-matra . 6) | |
425 | (,knd-glyph-end-marks . 7) | |
426 | )) | |
427 | ||
428 | (mapc | |
429 | (function (lambda (x) | |
430 | (mapc | |
431 | (function (lambda (y) | |
432 | (put-char-code-property y 'composition-order (cdr x)))) | |
433 | (car x)))) | |
434 | knd-glyph-order) | |
435 | ||
436 | (defun kannada-compose-syllable-string (string) | |
437 | (with-temp-buffer | |
438 | (insert (decompose-string string)) | |
439 | (kannada-compose-syllable-region (point-min) (point-max)) | |
440 | (buffer-string))) | |
441 | ||
442 | ;; kch | |
443 | (defun kannada-compose-syllable-region (from to) | |
444 | "Compose kannada syllable in region FROM to TO." | |
445 | (let ((glyph-str nil) (cons-num 0) (glyph-str-list nil) | |
446 | (last-virama nil) (preceding-r nil) (last-modifier nil) | |
447 | (last-char (char-before to)) match-str pos | |
448 | glyph-block split-pos (conj nil) (rest nil)) | |
449 | (save-excursion | |
450 | (save-restriction | |
451 | ;;; *** char-to-glyph conversion *** | |
452 | ;; Special rule 1. -- Last virama must be preserved. | |
453 | (if (eq last-char ?\e$,1?M\e(B) | |
454 | (progn | |
455 | (setq last-virama t) | |
456 | (narrow-to-region from (1- to))) | |
457 | (narrow-to-region from to)) | |
458 | (goto-char (point-min)) | |
459 | ;; Special rule 2. -- preceding "r virama" must be modifier. | |
460 | (when (looking-at "\e$,1?0?M\e(B.") | |
461 | (setq preceding-r t) | |
462 | (goto-char (+ 2 (point)))) | |
463 | ;; remove conjunct consonants | |
464 | (while (re-search-forward knd-char-glyph-regexp nil t) | |
465 | (setq match-str (match-string 0)) | |
466 | (if (and (string-match kannada-consonant match-str) | |
467 | (> cons-num 0)) | |
468 | (progn | |
469 | (setq conj (concat conj (gethash (match-string 0 match-str) | |
470 | knd-conjunct-glyph-hash))) | |
471 | (setq match-str (replace-match "" t nil match-str)) | |
472 | (if (string-match "\e$,1?M\e(B" rest) | |
473 | (setq rest (replace-match "" t nil rest))))) | |
474 | (setq rest (concat rest match-str)) | |
475 | ;; count the number of consonant-glyhs. | |
476 | (if (string-match kannada-consonant match-str) | |
477 | (setq cons-num (1+ cons-num)))) | |
478 | ;; translate the rest characters into glyphs | |
479 | (setq pos 0) | |
480 | (while (string-match knd-char-glyph-regexp rest pos) | |
481 | (setq match-str (match-string 0 rest)) | |
482 | (setq pos (match-end 0)) | |
483 | (setq glyph-str | |
484 | (concat glyph-str (gethash match-str knd-char-glyph-hash)))) | |
485 | ||
486 | (if conj (setq glyph-str (concat glyph-str conj))) | |
487 | (if last-virama (setq glyph-str (concat glyph-str "\e$,45)\e(B")) | |
488 | (goto-char (point-min)) | |
489 | (if (re-search-forward kannada-consonant-needs-twirl nil t) | |
490 | (progn | |
491 | (setq match-str (match-string 0)) | |
492 | (setq glyph-str (concat glyph-str "\e$,44z\e(B"))))) | |
493 | ;; preceding-r must be attached | |
494 | (if preceding-r | |
495 | (setq glyph-str (concat glyph-str "\e$,43%\e(B"))) | |
496 | ;;; *** glyph-to-glyph conversion *** | |
497 | (when (string-match knd-glyph-glyph-regexp glyph-str) | |
498 | (setq glyph-str | |
499 | (replace-match (gethash (match-string 0 glyph-str) | |
500 | knd-glyph-glyph-hash) | |
501 | nil t glyph-str))) | |
502 | ;;; *** glyph reordering *** | |
503 | (while (setq split-pos (string-match "\e$,45)\e(B\\|.$" glyph-str)) | |
504 | (setq glyph-block (substring glyph-str 0 (1+ split-pos))) | |
505 | (setq glyph-str (substring glyph-str (1+ split-pos))) | |
506 | (setq | |
507 | glyph-block | |
508 | (sort (string-to-list glyph-block) | |
509 | (function (lambda (x y) | |
510 | (< (get-char-code-property x 'composition-order) | |
511 | (get-char-code-property y 'composition-order)))))) | |
512 | (setq glyph-str-list (nconc glyph-str-list glyph-block))) | |
513 | ;;; *** insert space glyphs for kerning *** | |
514 | (if (> cons-num 0) | |
515 | (let ((curr glyph-str-list) (prev nil) (last-bott nil) bott co) | |
516 | (while curr | |
517 | (setq co (get-char-code-property | |
518 | (car curr) 'composition-order) | |
519 | bott (or (eq co 5) (eq co 6))) | |
520 | (if (and bott last-bott) | |
521 | (setcdr prev (cons ?\e$,44T\e(B curr))) | |
522 | (setq last-bott bott prev curr curr (cdr curr))))) | |
523 | ;; concatenate and attach reference-points. | |
524 | (setq glyph-str | |
525 | (cdr | |
526 | (apply | |
527 | 'nconc | |
528 | (mapcar | |
529 | (function (lambda (x) | |
530 | (list | |
531 | (or (get-char-code-property x 'reference-point) | |
532 | '(5 . 3) ;; default reference point. | |
533 | ) | |
534 | x))) | |
535 | glyph-str-list)))))) | |
536 | (compose-region from to glyph-str))) | |
537 | ||
538 | (provide 'knd-util) | |
539 | ||
540 | ;;; arch-tag: 78d32230-a960-46a5-b622-61ed6ffcf8fc | |
541 | ;;; knd-util.el ends here |