Add 2008 to copyright years.
[bpt/emacs.git] / lisp / language / knd-util.el
CommitLineData
75b62c63
KH
1;;; knd-util.el --- Support for composing Kannada characters
2
38141d20
GM
3;; Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008
4;; Free Software Foundation, Inc.
75b62c63
KH
5
6;; Maintainer: Maintainer: CHOWKSEY, Kailash C. <klchxbec@m-net.arbornet.org>
7;; Keywords: multilingual, Kannada
8
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
b4aa6026 13;; the Free Software Foundation; either version 3, or (at your option)
75b62c63
KH
14;; any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
22;; along with GNU Emacs; see the file COPYING. If not, write to the
3a35cf56
LK
23;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24;; Boston, MA 02110-1301, USA.
75b62c63
KH
25
26;; Created: Jul. 14. 2003
27
28;;; Commentary:
29
30;; This file provides character(Unicode) to glyph(CDAC) conversion and
31;; composition of Kannada script characters.
32
33;;; Code:
34
35;;;###autoload
36
37;; Kannada Composable Pattern
38;; C .. Consonants
39;; V .. Vowel
40;; H .. Virama
41;; M .. Matra
42;; V .. Vowel
43;; (N .. Zerowidth Non Joiner)
44;; (J .. Zerowidth Joiner. )
45;; 1. vowel
46;; V(A)?
47;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya)
48;; ((CH)?(CH)?(CH)?CH)?C(H|M?)?
49
50(defconst kannada-consonant
51 "[\e$,1>u\e(B-\e$,1?9\e(B]")
52
53(defconst kannada-consonant-needs-twirl
54 "[\e$,1>u>w\e(B-\e$,1>{>}\e(B-\e$,1>~? \e(B-\e$,1?"?$\e(B-\e$,1?+?-?0?3\e(B-\e$,1?9\e(B]\\(\e$,1?M\e(B[\e$,1>u\e(B-\e$,1?9\e(B]\\)*[\e$,1?A?B?C?D>b\e(B]?$")
55
56(defconst kannada-composable-pattern
57 (concat
58 "\\([\e$,1>b\e(B-\e$,1>t?`>l\e(B]\\)\\|[\e$,1>c\e(B]"
59 "\\|\\("
60 "\\(?:\\(?:[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?\\(?:[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?\\(?:[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?"
61 "[\e$,1>u\e(B-\e$,1?9\e(B]\\(?:\e$,1?M\e(B\\|[\e$,1?>\e(B-\e$,1?M?U?C\e(B]?\\)?"
62 "\\)")
63 "Regexp matching a composable sequence of Kannada characters.")
64
d63b2da8 65;;;###autoload
75b62c63
KH
66(defun kannada-compose-region (from to)
67 (interactive "r")
68 (save-excursion
69 (save-restriction
70 (narrow-to-region from to)
71 (goto-char (point-min))
72 (while (re-search-forward kannada-composable-pattern nil t)
73 (kannada-compose-syllable-region (match-beginning 0)
74 (match-end 0))))))
d63b2da8 75;;;###autoload
75b62c63
KH
76(defun kannada-compose-string (string)
77 (with-temp-buffer
78 (insert (decompose-string string))
79 (kannada-compose-region (point-min) (point-max))
80 (buffer-string)))
81
d63b2da8 82;;;###autoload
75b62c63
KH
83(defun kannada-post-read-conversion (len)
84 (save-excursion
85 (save-restriction
86 (let ((buffer-modified-p (buffer-modified-p)))
87 (narrow-to-region (point) (+ (point) len))
88 (kannada-compose-region (point-min) (point-max))
89 (set-buffer-modified-p buffer-modified-p)
90 (- (point-max) (point-min))))))
91
92(defun kannada-range (from to)
93 "Make the list of the integers of range FROM to TO."
94 (let (result)
95 (while (<= from to) (setq result (cons to result) to (1- to))) result))
96
97(defun kannada-regexp-of-hashtbl-keys (hashtbl)
98 "Return a regular expression that matches all keys in hashtable HASHTBL."
99 (let ((max-specpdl-size 1000))
100 (regexp-opt
101 (sort
102 (let (dummy)
103 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
104 dummy)
105 (function (lambda (x y) (> (length x) (length y))))))))
106
107(defun kannada-regexp-of-hashtbl-vals (hashtbl)
108 "Return a regular expression that matches all values in hashtable HASHTBL."
109 (let ((max-specpdl-size 1000))
110 (regexp-opt
111 (sort
112 (let (dummy)
113 (maphash (function (lambda (key val) (setq dummy (cons val dummy)))) hashtbl)
114 dummy)
115 (function (lambda (x y) (> (length x) (length y))))))))
116
117(defun kannada-composition-function (from to pattern &optional string)
118 "Compose Kannada characters in REGION, or STRING if specified.
119Assume that the REGION or STRING must fully match the composable
120PATTERN regexp."
121 (if string (kannada-compose-syllable-string string)
122 (kannada-compose-syllable-region from to))
123 (- to from))
124
125;; Register a function to compose Kannada characters.
126(mapc
127 (function (lambda (ucs)
128 (aset composition-function-table (decode-char 'ucs ucs)
129 (list (cons kannada-composable-pattern
130 'kannada-composition-function)))))
131 (kannada-range #x0c80 #x0cff))
132
133;; Notes on conversion steps.
134
135;; 1. chars to glyphs
136;;
137;; Rules will not be applied to the virama appeared at the end of the
138;; text. Also, the preceding/following "r" will be treated as special case.
139
140;; 2. glyphs reordering.
141;;
142;; The glyphs are split by virama, and each glyph groups are
143;; re-ordered in the following order.
144;;
145;; Note that `consonant-glyph' mentioned here does not contain the
146;; vertical bar (right modifier) attached at the right of the
147;; consonant.
148;;
149;; If the glyph-group contains right modifier,
150;; (1) consonant-glyphs/vowels
151;; (2) spacing
152;; (3) right modifier (may be matra)
153;; (4) top matra
154;; (5) preceding "r"
155;; (7) following "r"
156;; (8) bottom matra or virama.
157;;
158;; Otherwise,
159;; (1) consonant-glyph/vowels, with nukta sign
160;; (3) left matra
161;; (4) top matra
162;; (5) preceding "r"
163;; (7) following "r"
164;; (8) bottom matra or virama.
165;; (2) spacing
166
167;; 3. glyph to glyph
168;;
169;; For better display, some glyph display would be tuned.
170
171;; 4. Composition.
172;;
173;; left modifiers will be attached at the left.
174;; others will be attached right.
175
176;; Problem::
177;; Can we generalize this methods to other Indian scripts?
178
179(defvar knd-char-glyph
180 '(("\e$,1>e\e(B" . "\e$,43@\e(B")
181 ("\e$,1>f\e(B" . "\e$,43A\e(B")
182 ("\e$,1?>\e(B" . "\e$,44{\e(B")
183 ("\e$,1>g\e(B" . "\e$,43B\e(B")
184 ("\e$,1??\e(B" . nil)
185 ("\e$,1>h\e(B" . "\e$,43C\e(B")
186 ("\e$,1?@\e(B" . nil)
187 ("\e$,1>i\e(B" . "\e$,43D\e(B")
188 ("\e$,1?A\e(B" . "\\e$,44\7f\e(B")
189 ("\e$,1>j\e(B" . "\e$,43E\e(B")
190 ("\e$,1?B\e(B" . "\\e$,45 \e(B")
191 ("\e$,1>k\e(B" . "\e$,43F4\7f\e(B")
192 ("\e$,1?C\e(B" . "\\e$,45$\e(B")
193 ("\e$,1?`\e(B" . "\e$,43F5 \e(B")
194 ("\e$,1?D\e(B" . "\\e$,45%\e(B")
195 ;;("\e$,1>l\e(B" . nil) ; not implemented.
196 ;;("\e$,1?a\e(B" . nil)
197 ("\e$,1>n\e(B" . "\e$,43G\e(B")
198 ("\e$,1>o\e(B" . "\e$,43H\e(B")
199 ("\e$,1>p\e(B" . "\e$,43I\e(B")
200 ("\e$,1?F\e(B" . "\\e$,45&\e(B")
201 ("\e$,1?G\e(B" . "\\e$,45&4~\e(B")
202 ("\e$,1?H\e(B" . "\\e$,45&5'\e(B")
203 ("\e$,1>r\e(B" . "\e$,43J\e(B")
204 ("\e$,1?J\e(B" . "\e$,45&5 \e(B")
205 ("\e$,1>s\e(B" . "\e$,43K\e(B")
206 ("\e$,1?K\e(B" . "\\e$,45&5 4~\e(B")
207 ("\e$,1>t\e(B" . "\e$,43L\e(B")
208 ("\e$,1?L\e(B" . "\\e$,45(\e(B")
209 ("\e$,1>b\e(B" . "\e$,43M\e(B")
210 ("\e$,1>c\e(B" . "\e$,43N\e(B")
211 ("\e$,1>u?M\e(B" . "\e$,43O5)\e(B") ("\e$,1>u\e(B" . "\e$,43O\e(B") ("\e$,1>u??\e(B" . "\e$,43P\e(B") ("\e$,1>u?@\e(B" . "\e$,43P4~\e(B")
212 ("\e$,1>v?M\e(B" . "\e$,43S5)\e(B") ("\e$,1>v\e(B" . "\e$,43S\e(B") ("\e$,1>v??\e(B" . "\e$,43T\e(B") ("\e$,1>v?@\e(B" . "\e$,43T4~\e(B") ("\e$,1>v?F\e(B" . "\e$,43S5&\e(B") ("\e$,1>v?G\e(B" . "\e$,43S5&4~\e(B") ("\e$,1>v?H\e(B" . "\e$,43S5&5'\e(B") ("\e$,1>v?J\e(B" . "\e$,43S5&5&5 \e(B") ("\e$,1>v?K\e(B" . "\e$,43S5&5&5 4~\e(B") ("\e$,1>v?L\e(B" . "\e$,43S5(\e(B")
213 ("\e$,1>w?M\e(B" . "\e$,43V5)\e(B") ("\e$,1>w\e(B" . "\e$,43V\e(B") ("\e$,1>w??\e(B" . "\e$,43W\e(B") ("\e$,1>w?@\e(B" . "\e$,43W4~\e(B")
214 ("\e$,1>x?M\e(B" . "\e$,43Y5)\e(B") ("\e$,1>x\e(B" . "\e$,43Y\e(B") ("\e$,1>x??\e(B" . "\e$,43Z\e(B") ("\e$,1>x?@\e(B" . "\e$,43Z4~\e(B")
215 ("\e$,1>y?M\e(B" . "\e$,43\5)\e(B") ("\e$,1>y\e(B" . "\e$,43\\e(B")
216 ("\e$,1>z?M\e(B" . "\e$,43^5)\e(B") ("\e$,1>z\e(B" . "\e$,43^\e(B") ("\e$,1>z??\e(B" . "\e$,43_\e(B") ("\e$,1>z?@\e(B" . "\e$,43_4~\e(B")
217 ("\e$,1>{?M\e(B" . "\e$,43a5)\e(B") ("\e$,1>{\e(B" . "\e$,43a\e(B") ("\e$,1>{??\e(B" . "\e$,43b\e(B") ("\e$,1>{?@\e(B" . "\e$,43b4~\e(B")
218 ("\e$,1>|?M\e(B" . "\e$,43d5)\e(B") ("\e$,1>|\e(B" . "\e$,43d\e(B") ("\e$,1>|??\e(B" . "\e$,43f\e(B") ("\e$,1>|?@\e(B" . "\e$,43f4~\e(B") ("\e$,1>|?F\e(B" . "\e$,43e5&\e(B") ("\e$,1>|?G\e(B" . "\e$,43e5&4~\e(B") ("\e$,1>|?H\e(B" . "\e$,43e5&5'\e(B") ("\e$,1>|?J\e(B" . "\e$,43e5&5&5 \e(B") ("\e$,1>|?K\e(B" . "\e$,43e5&5&5 4~\e(B") ("\e$,1>|?L\e(B" . "\e$,43e5(\e(B")
219 ("\e$,1>}?M\e(B" . "\e$,44a4z3h4\7f5)\e(B") ("\e$,1>}\e(B" . "\e$,44a4z3h4\7f\e(B") ("\e$,1>}??\e(B" . "\e$,44b3h4\7f\e(B") ("\e$,1>}?@\e(B" . "\e$,44b3h4\7f4~\e(B") ("\e$,1>}?B\e(B". "\e$,44a4z3h5 \e(B") ("\e$,1>}?J\e(B". "\e$,44a5&3h5 \e(B") ("\e$,1>}?K\e(B". "\e$,44a5&3h5 4~\e(B")
220 ("\e$,1>~?M\e(B" . "\e$,43j5)\e(B") ("\e$,1>~\e(B" . "\e$,43j\e(B")
221 ("\e$,1>\7f?M\e(B" . "\e$,43m5)\e(B") ("\e$,1>\7f\e(B" . "\e$,43l\e(B") ("\e$,1?#?>\e(B" . "\e$,43m4{\e(B") ("\e$,1>\7f??\e(B" . "\e$,43n\e(B") ("\e$,1>\7f?@\e(B" . "\e$,43n4~\e(B") ("\e$,1>\7f?F\e(B" . "\e$,43m5&\e(B") ("\e$,1>\7f?G\e(B" . "\e$,43m5&4~\e(B") ("\e$,1>\7f?H\e(B" . "\e$,43m5&5'\e(B") ("\e$,1>\7f?J\e(B" . "\e$,43m5&5&5 \e(B") ("\e$,1>\7f?K\e(B" . "\e$,43m5&5&5 4~\e(B") ("\e$,1>\7f?L\e(B" . "\e$,43m5(\e(B")
222 ("\e$,1? ?M\e(B" . "\e$,43p5)\e(B") ("\e$,1? \e(B" . "\e$,43p\e(B") ("\e$,1? ??\e(B" . "\e$,43q\e(B") ("\e$,1? ?@\e(B" . "\e$,43q4~\e(B")
223 ("\e$,1?!?M\e(B" . "\e$,43s5)\e(B") ("\e$,1?!\e(B" . "\e$,43s\e(B") ("\e$,1?!??\e(B" . "\e$,43t\e(B") ("\e$,1?!?@\e(B" . "\e$,43t4~\e(B")
224 ("\e$,1?"?M\e(B" . "\e$,43v5)\e(B") ("\e$,1?"\e(B" . "\e$,43v\e(B") ("\e$,1?"??\e(B" . "\e$,43w\e(B") ("\e$,1?"?@\e(B" . "\e$,43w4~\e(B")
225 ("\e$,1?#?M\e(B" . "\e$,43z5)\e(B") ("\e$,1?#\e(B" . "\e$,43y\e(B") ("\e$,1?#?>\e(B" . "\e$,43z4{\e(B") ("\e$,1?#??\e(B" . "\e$,43{\e(B") ("\e$,1?#?@\e(B" . "\e$,43{4~\e(B") ("\e$,1?#?F\e(B" . "\e$,43z5&\e(B") ("\e$,1?#?G\e(B" . "\e$,43z5&4~\e(B") ("\e$,1?#?H\e(B" . "\e$,43z5&5'\e(B") ("\e$,1?#?J\e(B" . "\e$,43z5&5&5 \e(B") ("\e$,1?#?K\e(B" . "\e$,43z5&5&5 4~\e(B") ("\e$,1?#?L\e(B" . "\e$,43z5(\e(B")
226 ("\e$,1?$?M\e(B" . "\e$,43}5)\e(B") ("\e$,1?$\e(B" . "\e$,43}\e(B") ("\e$,1?$??\e(B" . "\e$,43~\e(B") ("\e$,1?$?@\e(B" . "\e$,43~4~\e(B")
227 ("\e$,1?%?M\e(B" . "\e$,44B5)\e(B") ("\e$,1?%\e(B" . "\e$,44B\e(B") ("\e$,1?%??\e(B" . "\e$,44C\e(B") ("\e$,1?%?@\e(B" . "\e$,44C4~\e(B")
228 ("\e$,1?&?M\e(B" . "\e$,44E5)\e(B") ("\e$,1?&\e(B" . "\e$,44E\e(B") ("\e$,1?&??\e(B" . "\e$,44F\e(B") ("\e$,1?&?@\e(B" . "\e$,44F4~\e(B")
229 ("\e$,1?'?M\e(B" . "\e$,44H5)\e(B") ("\e$,1?'\e(B" . "\e$,44H\e(B") ("\e$,1?'??\e(B" . "\e$,44I\e(B") ("\e$,1?'?@\e(B" . "\e$,44I4~\e(B")
230 ("\e$,1?(?M\e(B" . "\e$,44K5)\e(B") ("\e$,1?(\e(B" . "\e$,44K\e(B") ("\e$,1?(??\e(B" . "\e$,44L\e(B") ("\e$,1?(?@\e(B" . "\e$,44L4~\e(B")
231 ("\e$,1?*?M\e(B" . "\e$,44N5)\e(B") ("\e$,1?*\e(B" . "\e$,44N\e(B") ("\e$,1?*??\e(B" . "\e$,44O\e(B") ("\e$,1?*?@\e(B" . "\e$,44O4~\e(B") ("\e$,1?*?A\e(B" . "\e$,44N5"\e(B") ("\e$,1?*?B\e(B" . "\e$,44N5#\e(B") ("\e$,1?*?J\e(B" . "\e$,44N5&5#\e(B") ("\e$,1?*?K\e(B" . "\e$,44N5&5#4~\e(B")
232 ("\e$,1?+?M\e(B" . "\e$,44Q5)\e(B") ("\e$,1?+\e(B" . "\e$,44Q\e(B") ("\e$,1?+??\e(B" . "\e$,44R\e(B") ("\e$,1?+?@\e(B" . "\e$,44R4~\e(B") ("\e$,1?+?A\e(B" . "\e$,44Q5"\e(B") ("\e$,1?+?B\e(B" . "\e$,44Q5#\e(B") ("\e$,1?+?J\e(B" . "\e$,44Q5&5#\e(B") ("\e$,1?+?K\e(B" . "\e$,44Q5&5#4~\e(B")
233 ("\e$,1?,?M\e(B" . "\e$,44W5)\e(B") ("\e$,1?,\e(B" . "\e$,44V\e(B") ("\e$,1?,?>\e(B". "\e$,44W4{\e(B") ("\e$,1?,??\e(B" . "\e$,44X\e(B") ("\e$,1?,?@\e(B" . "\e$,44X4~\e(B") ("\e$,1?,?F\e(B" . "\e$,44W5&\e(B") ("\e$,1?,?G\e(B" . "\e$,44W5&4~\e(B") ("\e$,1?,?H\e(B" . "\e$,44W5&5'\e(B") ("\e$,1?,?J\e(B" . "\e$,44W5&5&5 \e(B") ("\e$,1?,?K\e(B" . "\e$,44W5&5&5 4~\e(B") ("\e$,1?,?L\e(B" . "\e$,44W5(\e(B")
234 ("\e$,1?-?M\e(B" . "\e$,44Z5)\e(B") ("\e$,1?-\e(B" . "\e$,44Z\e(B") ("\e$,1?-??\e(B" . "\e$,44[\e(B") ("\e$,1?-?@\e(B" . "\e$,44[4~\e(B")
235 ("\e$,1?.?M\e(B" . "\e$,44h5!5)\e(B") ("\e$,1?.\e(B" . "\e$,44h4z4\7f\e(B") ("\e$,1?.?>\e(B" . "\e$,44h4z5!4{\e(B") ("\e$,1?.??\e(B" . "\e$,44i4\7f\e(B") ("\e$,1?.?@\e(B" . "\e$,44i4\7f4~\e(B") ("\e$,1?.?J\e(B". "\e$,44h5&5 \e(B") ("\e$,1?.?K\e(B". "\e$,44h5&5 4~\e(B")
236 ("\e$,1?/?M\e(B" . "\e$,44^4z5!5)\e(B") ("\e$,1?/\e(B" . "\e$,44^4z4\7f\e(B") ("\e$,1?/?>\e(B" . "\e$,44^4z5!4{\e(B")("\e$,1?/??\e(B" . "\e$,44_4\7f\e(B") ("\e$,1?/?@\e(B" . "\e$,44_4\7f4~\e(B") ("\e$,1?/?J\e(B" . "\e$,44^5&5 \e(B") ("\e$,1?/?K\e(B" . "\e$,44^5&5 4~\e(B")
237 ("\e$,1?0?M\e(B" . "\e$,44a5)\e(B") ("\e$,1?0\e(B" . "\e$,44a\e(B") ("\e$,1?0??\e(B" . "\e$,44b\e(B") ("\e$,1?0?@\e(B" . "\e$,44b4~\e(B")
238 ("\e$,1?0?M\e(B" . "\e$,44a5)\e(B") ("\e$,1?0\e(B" . "\e$,44a\e(B") ("\e$,1?0??\e(B" . "\e$,44b\e(B") ("\e$,1?0?@\e(B" . "\e$,44b4~\e(B")
239 ("\e$,1?2?M\e(B" . "\e$,44e5)\e(B") ("\e$,1?2\e(B" . "\e$,44d\e(B") ("\e$,1?2?>\e(B" . "\e$,44e4{\e(B") ("\e$,1?2??\e(B" . "\e$,44f\e(B") ("\e$,1?2?@\e(B" . "\e$,44f4~\e(B") ("\e$,1?2?F\e(B" . "\e$,44e5&\e(B") ("\e$,1?2?G\e(B" . "\e$,44e5&4~\e(B") ("\e$,1?2?H\e(B" . "\e$,44e5&5'\e(B") ("\e$,1?2?J\e(B" . "\e$,44e5&5&5 \e(B") ("\e$,1?2?K\e(B" . "\e$,44e5&5&5 4~\e(B") ("\e$,1?2?L\e(B" . "\e$,44e5(\e(B")
240 ("\e$,1?5?M\e(B" . "\e$,44h5)\e(B") ("\e$,1?5\e(B" . "\e$,44h\e(B") ("\e$,1?5??\e(B" . "\e$,44i\e(B") ("\e$,1?5?@\e(B" . "\e$,44i4~\e(B") ("\e$,1?5?A\e(B" . "\e$,44h5"\e(B") ("\e$,1?5?B\e(B" . "\e$,44h5#\e(B") ("\e$,1?5?J\e(B" . "\e$,44h5&5#\e(B") ("\e$,1?5?K\e(B" . "\e$,44h5&5#4~\e(B")
241 ("\e$,1?6?M\e(B" . "\e$,44k5)\e(B") ("\e$,1?6\e(B" . "\e$,44k\e(B") ("\e$,1?6??\e(B" . "\e$,44l\e(B") ("\e$,1?6?@\e(B" . "\e$,44l4~\e(B")
242 ("\e$,1?7?M\e(B" . "\e$,44n5)\e(B") ("\e$,1?7\e(B" . "\e$,44n\e(B") ("\e$,1?7??\e(B" . "\e$,44o\e(B") ("\e$,1?7?@\e(B" . "\e$,44o4~\e(B")
243 ("\e$,1?8?M\e(B" . "\e$,44q5)\e(B") ("\e$,1?8\e(B" . "\e$,44q\e(B") ("\e$,1?8??\e(B" . "\e$,44r\e(B") ("\e$,1?8?@\e(B" . "\e$,44r4~\e(B")
244 ("\e$,1?9?M\e(B" . "\e$,44t5)\e(B") ("\e$,1?9\e(B" . "\e$,44t\e(B") ("\e$,1?9??\e(B" . "\e$,44u\e(B") ("\e$,1?9?@\e(B" . "\e$,44u4~\e(B")
245 ("\e$,1?3?M\e(B" . "\e$,44w5)\e(B") ("\e$,1?3\e(B" . "\e$,44w\e(B") ("\e$,1?3??\e(B" . "\e$,44x\e(B") ("\e$,1?3?@\e(B" . "\e$,44x4~\e(B"))
246"Kannada characters to glyphs conversion table.
247Default value contains only the basic rules.")
248
249(defvar knd-char-glyph-hash
250 (let* ((hash (make-hash-table :test 'equal)))
251 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
252 knd-char-glyph)
253 hash))
254
255(defvar knd-char-glyph-regexp
256 (kannada-regexp-of-hashtbl-keys knd-char-glyph-hash))
257
258(defvar knd-conjunct-glyph
259 '(("\e$,1>u\e(B" . "\e$,43Q\e(B") ("\e$,1>v\e(B" . "\e$,43U\e(B") ("\e$,1>w\e(B" . "\e$,43X\e(B") ("\e$,1>x\e(B" . "\e$,43[\e(B") ("\e$,1>y\e(B" . "\e$,43]\e(B")
260 ("\e$,1>z\e(B" . "\e$,43`\e(B") ("\e$,1>{\e(B" . "\e$,43c\e(B") ("\e$,1>|\e(B" . "\e$,43g\e(B") ("\e$,1>}\e(B" . "\e$,43i\e(B") ("\e$,1>~\e(B" . "\e$,43k\e(B")
261 ("\e$,1>\7f\e(B" . "\e$,43o\e(B") ("\e$,1? \e(B" . "\e$,43r\e(B") ("\e$,1?!\e(B" . "\e$,43u\e(B") ("\e$,1?"\e(B" . "\e$,43x\e(B") ("\e$,1?#\e(B" . "\e$,43|\e(B")
262 ("\e$,1?$\e(B" . "\e$,44A\e(B") ("\e$,1?%\e(B" . "\e$,44D\e(B") ("\e$,1?&\e(B" . "\e$,44G\e(B") ("\e$,1?'\e(B" . "\e$,44J\e(B") ("\e$,1?(\e(B" . "\e$,44M\e(B")
263 ("\e$,1?*\e(B" . "\e$,44P\e(B") ("\e$,1?+\e(B" . "\e$,44U\e(B") ("\e$,1?,\e(B" . "\e$,44Y\e(B") ("\e$,1?-\e(B" . "\e$,44\\e(B") ("\e$,1?.\e(B" . "\e$,44]\e(B")
264 ("\e$,1?/\e(B" . "\e$,44`\e(B") ("\e$,1?0\e(B" . "\e$,44c\e(B") ("\e$,1?2\e(B" . "\e$,44g\e(B") ("\e$,1?3\e(B" . "\e$,44y\e(B") ("\e$,1?5\e(B" . "\e$,44j\e(B")
265 ("\e$,1?6\e(B" . "\e$,44m\e(B") ("\e$,1?7\e(B" . "\e$,44p\e(B") ("\e$,1?8\e(B" . "\e$,44s\e(B") ("\e$,1?9\e(B" . "\e$,44v\e(B"))
266"Kannada characters to conjunct glyphs conversion table.")
267
268(defvar knd-conjunct-glyph-hash
269 (let* ((hash (make-hash-table :test 'equal)))
270 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
271 knd-conjunct-glyph)
272 hash))
273
274(defvar knd-conjunct-glyph-regexp
275 (kannada-regexp-of-hashtbl-vals knd-conjunct-glyph-hash))
276
277(mapc
278 (function (lambda (x)
279 (put-char-code-property (aref (cdr x) 0) 'reference-point '(5 . 3))))
280 knd-conjunct-glyph)
281
282;; glyph-to-glyph conversion table.
283;; it is supposed that glyphs are ordered in
284;; [consonant/nukta] - [matra/virama] - [preceding-r] - [anuswar].
285
286(defvar knd-glyph-glyph
287 '(("\e$,45$4A\e(B" . "\e$,45*\e(B")
288 ("\e$,45'4A\e(B" . "\e$,45+\e(B")
289 ("\e$,44A3g\e(B" . "\e$,45,\e(B")
290 ("\e$,45$3Q\e(B" . "\e$,45-\e(B")))
291
292(defvar knd-glyph-glyph-hash
293 (let* ((hash (make-hash-table :test 'equal)))
294 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
295 knd-glyph-glyph)
296 hash))
297(defvar knd-glyph-glyph-regexp
298 (kannada-regexp-of-hashtbl-keys knd-glyph-glyph-hash))
299
300(defun knd-charseq (from &optional to)
301 (if (null to) (setq to from))
302 (mapcar (function (lambda (x) (indian-glyph-char x 'kannada)))
303 (kannada-range from to)))
304
305(defvar knd-glyph-cv
306 (append
307 (knd-charseq #x40 #x50)
308 (knd-charseq #x52 #x54)
309 (knd-charseq #x56 #x57)
310 (knd-charseq #x59 #x5a)
311 (knd-charseq #x5c)
312 (knd-charseq #x5e #x5f)
313 (knd-charseq #x61 #x62)
314 (knd-charseq #x64 #x66)
315 (knd-charseq #x6a)
316 (knd-charseq #x6c #x6e)
317 (knd-charseq #x70 #x71)
318 (knd-charseq #x73 #x74)
319 (knd-charseq #x76 #x77)
320 (knd-charseq #x79 #x7b)
321 (knd-charseq #x7d #x7e)
322 (knd-charseq #xa2 #xa3)
323 (knd-charseq #xa5 #xa6)
324 (knd-charseq #xa8 #xa9)
325 (knd-charseq #xab #xac)
326 (knd-charseq #xae #xaf)
327 (knd-charseq #xb1 #xb2)
328 (knd-charseq #xb6 #xb8)
329 (knd-charseq #xb6 #xb8)
330 (knd-charseq #xba #xbb)
331 (knd-charseq #xbe #xbf)
332 (knd-charseq #xc1 #xc2)
333 (knd-charseq #xc4 #xc6)
334 (knd-charseq #xc8 #xc9)
335 (knd-charseq #xcb #xcc)
336 (knd-charseq #xce #xcf)
337 (knd-charseq #xd1 #xd2)
338 (knd-charseq #xd4 #xd5)
339 (knd-charseq #xd7 #xd8)
340 (knd-charseq #xc3))
341 "Kannada Consonants/Vowels/Nukta Glyphs")
342
343(defvar knd-glyph-space
344 (knd-charseq #xb3 #xb4)
345 "Kannada Spacing Glyphs")
346
347(defvar knd-glyph-right-modifier
348 (append
349 (knd-charseq #xdb #xdd)
350 (knd-charseq #xdf)
351 (knd-charseq #xe0 #xe3)
352 (knd-charseq #xe9))
353 "Kannada Modifiers attached at the right side.")
354
355(defvar knd-glyph-right-modifier-regexp
356 (concat "[" knd-glyph-right-modifier "]"))
357
358(defvar knd-glyph-jha-tail
359 (knd-charseq #x68)
360 "Kannada tail for jha.")
361
362(defvar knd-glyph-top-matra
363 (append
364 (knd-charseq #xda)
365 (knd-charseq #xdd)
366 (knd-charseq #xe6)
367 (knd-charseq #xe8))
368 "Kannada Matras attached at the top side.")
369
370(defvar knd-glyph-bottom-matra
371 (append
372 (knd-charseq #xe4 #xe5)
373 (knd-charseq #xe7))
374 "Kannada Matras attached at the bottom.")
375
376(defvar knd-glyph-end-marks
377 (append
378 (knd-charseq #x25)
379 (knd-charseq #x4d #x4e)
380 (knd-charseq #xde))
381 "Kannada end marks: arkavattu, virama, au and diirghaa.")
382
383(defvar knd-glyph-bottom-modifier
384 (append
385 (knd-charseq #x51)
386 (knd-charseq #x55)
387 (knd-charseq #x58)
388 (knd-charseq #x5b)
389 (knd-charseq #x5d)
390 (knd-charseq #x60)
391 (knd-charseq #x63)
392 (knd-charseq #x67)
393 (knd-charseq #x69)
394 (knd-charseq #x6b)
395 (knd-charseq #x6f)
396 (knd-charseq #x72)
397 (knd-charseq #x75)
398 (knd-charseq #x78)
399 (knd-charseq #x7c)
400 (knd-charseq #xa1)
401 (knd-charseq #xa4)
402 (knd-charseq #xa7)
403 (knd-charseq #xaa)
404 (knd-charseq #xad)
405 (knd-charseq #xb0)
406 (knd-charseq #xb5)
407 (knd-charseq #xb9)
408 (knd-charseq #xbc #xbd)
409 (knd-charseq #xc0)
410 (knd-charseq #xc3)
411 (knd-charseq #xc7)
412 (knd-charseq #xca)
413 (knd-charseq #xcd)
414 (knd-charseq #xd0)
415 (knd-charseq #xd3)
416 (knd-charseq #xd6)
417 (knd-charseq #xd9)
418 (knd-charseq #xea #xef))
419 "Kannada Modifiers attached at the bottom.")
420
421(defvar knd-glyph-order
422 `((,knd-glyph-cv . 1)
423 (,knd-glyph-top-matra . 2)
424 (,knd-glyph-jha-tail . 3)
425 (,knd-glyph-right-modifier . 4)
426 (,knd-glyph-space . 5)
427 (,knd-glyph-bottom-modifier . 5)
428 (,knd-glyph-bottom-matra . 6)
429 (,knd-glyph-end-marks . 7)
430 ))
431
432(mapc
433 (function (lambda (x)
434 (mapc
435 (function (lambda (y)
436 (put-char-code-property y 'composition-order (cdr x))))
437 (car x))))
438 knd-glyph-order)
439
440(defun kannada-compose-syllable-string (string)
441 (with-temp-buffer
442 (insert (decompose-string string))
443 (kannada-compose-syllable-region (point-min) (point-max))
444 (buffer-string)))
445
446;; kch
447(defun kannada-compose-syllable-region (from to)
448 "Compose kannada syllable in region FROM to TO."
449 (let ((glyph-str nil) (cons-num 0) (glyph-str-list nil)
450 (last-virama nil) (preceding-r nil) (last-modifier nil)
451 (last-char (char-before to)) match-str pos
452 glyph-block split-pos (conj nil) (rest nil))
453 (save-excursion
454 (save-restriction
455 ;;; *** char-to-glyph conversion ***
456 ;; Special rule 1. -- Last virama must be preserved.
457 (if (eq last-char ?\e$,1?M\e(B)
458 (progn
459 (setq last-virama t)
460 (narrow-to-region from (1- to)))
461 (narrow-to-region from to))
462 (goto-char (point-min))
463 ;; Special rule 2. -- preceding "r virama" must be modifier.
464 (when (looking-at "\e$,1?0?M\e(B.")
465 (setq preceding-r t)
466 (goto-char (+ 2 (point))))
467 ;; remove conjunct consonants
468 (while (re-search-forward knd-char-glyph-regexp nil t)
469 (setq match-str (match-string 0))
470 (if (and (string-match kannada-consonant match-str)
471 (> cons-num 0))
472 (progn
473 (setq conj (concat conj (gethash (match-string 0 match-str)
474 knd-conjunct-glyph-hash)))
475 (setq match-str (replace-match "" t nil match-str))
476 (if (string-match "\e$,1?M\e(B" rest)
477 (setq rest (replace-match "" t nil rest)))))
478 (setq rest (concat rest match-str))
479 ;; count the number of consonant-glyhs.
480 (if (string-match kannada-consonant match-str)
481 (setq cons-num (1+ cons-num))))
482 ;; translate the rest characters into glyphs
483 (setq pos 0)
484 (while (string-match knd-char-glyph-regexp rest pos)
485 (setq match-str (match-string 0 rest))
486 (setq pos (match-end 0))
487 (setq glyph-str
488 (concat glyph-str (gethash match-str knd-char-glyph-hash))))
489
490 (if conj (setq glyph-str (concat glyph-str conj)))
491 (if last-virama (setq glyph-str (concat glyph-str "\e$,45)\e(B"))
492 (goto-char (point-min))
493 (if (re-search-forward kannada-consonant-needs-twirl nil t)
494 (progn
495 (setq match-str (match-string 0))
496 (setq glyph-str (concat glyph-str "\e$,44z\e(B")))))
497 ;; preceding-r must be attached
498 (if preceding-r
499 (setq glyph-str (concat glyph-str "\e$,43%\e(B")))
500 ;;; *** glyph-to-glyph conversion ***
501 (when (string-match knd-glyph-glyph-regexp glyph-str)
502 (setq glyph-str
503 (replace-match (gethash (match-string 0 glyph-str)
504 knd-glyph-glyph-hash)
505 nil t glyph-str)))
506 ;;; *** glyph reordering ***
507 (while (setq split-pos (string-match "\e$,45)\e(B\\|.$" glyph-str))
508 (setq glyph-block (substring glyph-str 0 (1+ split-pos)))
509 (setq glyph-str (substring glyph-str (1+ split-pos)))
510 (setq
511 glyph-block
512 (sort (string-to-list glyph-block)
513 (function (lambda (x y)
514 (< (get-char-code-property x 'composition-order)
515 (get-char-code-property y 'composition-order))))))
516 (setq glyph-str-list (nconc glyph-str-list glyph-block)))
517 ;;; *** insert space glyphs for kerning ***
518 (if (> cons-num 0)
519 (let ((curr glyph-str-list) (prev nil) (last-bott nil) bott co)
520 (while curr
521 (setq co (get-char-code-property
522 (car curr) 'composition-order)
523 bott (or (eq co 5) (eq co 6)))
524 (if (and bott last-bott)
525 (setcdr prev (cons ?\e$,44T\e(B curr)))
526 (setq last-bott bott prev curr curr (cdr curr)))))
527 ;; concatenate and attach reference-points.
528 (setq glyph-str
529 (cdr
530 (apply
531 'nconc
532 (mapcar
533 (function (lambda (x)
534 (list
535 (or (get-char-code-property x 'reference-point)
536 '(5 . 3) ;; default reference point.
537 )
538 x)))
539 glyph-str-list))))))
540 (compose-region from to glyph-str)))
541
542(provide 'knd-util)
543
e4eaf898 544;;; arch-tag: 78d32230-a960-46a5-b622-61ed6ffcf8fc
75b62c63 545;;; knd-util.el ends here