Commit | Line | Data |
---|---|---|
585eb076 KH |
1 | ;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*- |
2 | ||
3 | ;; Copyright (C) 2001 Free Software Foundation, Inc. | |
4 | ||
5 | ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org> | |
6 | ;; Keywords: multilingual, Indian, Tamil | |
7 | ||
8 | ;; This file is part of GNU Emacs. | |
9 | ||
10 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
11 | ;; it under the terms of the GNU General Public License as published by | |
12 | ;; the Free Software Foundation; either version 2, or (at your option) | |
13 | ;; any later version. | |
14 | ||
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
22 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
23 | ;; Boston, MA 02111-1307, USA. | |
24 | ||
25 | ;; Created: Nov. 08. 2002 | |
26 | ||
27 | ;;; Commentary: | |
28 | ||
29 | ;; This file provides character(Unicode) to glyph(CDAC) conversion and | |
30 | ;; composition of Tamil script characters. | |
31 | ||
32 | ;;; Code: | |
33 | ||
34 | ;; Tamil Composable Pattern | |
35 | ;; C .. Consonants | |
36 | ;; V .. Vowel | |
37 | ;; H .. Pulli | |
38 | ;; M .. Matra | |
39 | ;; V .. Vowel | |
40 | ;; A .. Anuswar | |
41 | ;; D .. Chandrabindu | |
42 | ;; 1. vowel | |
43 | ;; V | |
44 | ;; 2. syllable : only ligature-formed pattern forms composition. | |
45 | ;; (CkHCs|C)(H|M)? | |
46 | ;; 3. sri special | |
47 | ;; (CsHCrVi) | |
48 | ||
49 | ;; oririnal | |
50 | ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)? | |
51 | ||
52 | (defconst tamil-consonant | |
53 | "[\e$,1<5\e(B-\e$,1<Y\e(B]") | |
54 | ||
55 | (defconst tamil-composable-pattern | |
56 | (concat | |
57 | "\\([\e$,1<%\e(B-\e$,1<4\e(B]\\)\\|" | |
58 | "[\e$,1<"<#\e(B]\\|" ;; vowel modifier considered independent | |
59 | "\\(\\(?:\\(?:\e$,1<5<m<W\e(B\\)\\|[\e$,1<5\e(B-\e$,1<Y\e(B]\\)[\e$,1<m<^\e(B-\e$,1<l\e(B]?\\)\\|" | |
60 | "\\(\e$,1<W<m<P<`\e(B\\)") | |
61 | "Regexp matching a composable sequence of Tamil characters.") | |
62 | ||
63 | ;;;###autoload | |
64 | (defun tamil-compose-region (from to) | |
65 | (interactive "r") | |
66 | (save-excursion | |
67 | (save-restriction | |
68 | (narrow-to-region from to) | |
69 | (goto-char (point-min)) | |
70 | (while (re-search-forward tamil-composable-pattern nil t) | |
71 | (tamil-compose-syllable-region (match-beginning 0) | |
72 | (match-end 0)))))) | |
73 | (defun tamil-compose-string (string) | |
74 | (with-temp-buffer | |
75 | (insert (decompose-string string)) | |
76 | (tamil-compose-region (point-min) (point-max)) | |
77 | (buffer-string))) | |
78 | ||
6b61353c | 79 | ;;;###autoload |
585eb076 KH |
80 | (defun tamil-post-read-conversion (len) |
81 | (save-excursion | |
82 | (save-restriction | |
83 | (let ((buffer-modified-p (buffer-modified-p))) | |
84 | (narrow-to-region (point) (+ (point) len)) | |
85 | (tamil-compose-region (point-min) (point-max)) | |
86 | (set-buffer-modified-p buffer-modified-p) | |
87 | (- (point-max) (point-min)))))) | |
88 | ||
89 | (defun tamil-range (from to) | |
90 | "Make the list of the integers of range FROM to TO." | |
91 | (let (result) | |
92 | (while (<= from to) (setq result (cons to result) to (1- to))) result)) | |
93 | ||
94 | (defun tamil-regexp-of-hashtbl-keys (hashtbl) | |
95 | "Return a regular expression that matches all keys in hashtable HASHTBL." | |
96 | (let ((max-specpdl-size 1000)) | |
97 | (regexp-opt | |
98 | (sort | |
99 | (let (dummy) | |
100 | (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl) | |
101 | dummy) | |
102 | (function (lambda (x y) (> (length x) (length y)))))))) | |
103 | ||
104 | ||
585eb076 KH |
105 | ;; Notes on conversion steps. |
106 | ||
107 | ;; 1. chars to glyphs | |
108 | ;; Simple replacement of characters to glyphs is done. | |
109 | ||
110 | ;; 2. glyphs reordering. | |
111 | ;; following "\e$,4)j\e(B", "\e$,4)k\e(B", "\e$,4)l\e(B" goes to the front. | |
112 | ||
113 | ;; 3. glyphs to glyphs | |
114 | ;; reordered vowels are ligatured to consonants. | |
115 | ||
116 | ;; 4. Composition. | |
117 | ;; left modifiers will be attached at the left. | |
118 | ;; others will be attached right. | |
119 | ||
120 | (defvar tml-char-glyph | |
121 | '(;; various signs | |
8f924df7 | 122 | ("\e$,1<"\e(B" . "\e$,4)b\e(B") ;; not good |
585eb076 KH |
123 | ("\e$,1<#\e(B" . "\e$,4*G\e(B") |
124 | ;; Independent Vowels | |
125 | ("\e$,1<%\e(B" . "\e$,4*<\e(B") | |
126 | ("\e$,1<&\e(B" . "\e$,4*=\e(B") | |
127 | ("\e$,1<'\e(B" . "\e$,4*>\e(B") | |
128 | ("\e$,1<(\e(B" . "\e$,4*?\e(B") | |
129 | ("\e$,1<)\e(B" . "\e$,4*@\e(B") | |
130 | ("\e$,1<*\e(B" . "\e$,4*A\e(B") | |
131 | ("\e$,1<.\e(B" . "\e$,4*B\e(B") | |
132 | ("\e$,1</\e(B" . "\e$,4*C\e(B") | |
133 | ("\e$,1<0\e(B" . "\e$,4*D\e(B") | |
134 | ("\e$,1<2\e(B" . "\e$,4*E\e(B") | |
135 | ("\e$,1<3\e(B" . "\e$,4*F\e(B") | |
136 | ("\e$,1<4\e(B" . "\e$,4*E*W\e(B") | |
137 | ;; Consonants | |
138 | ("\e$,1<5<m<W<m\e(B" . "\e$,4):\e(B") ; ks. | |
139 | ("\e$,1<5<m<W\e(B" . "\e$,4*^\e(B") ; ks | |
140 | ("\e$,1<5\e(B" . "\e$,4*H\e(B") | |
141 | ||
142 | ("\e$,1<9\e(B" . "\e$,4*I\e(B") | |
143 | ("\e$,1<:\e(B" . "\e$,4*J\e(B") | |
144 | ("\e$,1<<\e(B" . "\e$,4*\\e(B") | |
145 | ("\e$,1<<<m\e(B" . "\e$,4)8\e(B") | |
146 | ("\e$,1<>\e(B" . "\e$,4*K\e(B") | |
147 | ("\e$,1<?\e(B" . "\e$,4*L\e(B") | |
148 | ("\e$,1<C\e(B" . "\e$,4*M\e(B") | |
149 | ("\e$,1<D\e(B" . "\e$,4*N\e(B") | |
150 | ("\e$,1<H\e(B" . "\e$,4*O\e(B") | |
151 | ("\e$,1<I\e(B" . "\e$,4*Y\e(B") | |
152 | ("\e$,1<I<m\e(B" . "\e$,4)a\e(B") | |
153 | ("\e$,1<J\e(B" . "\e$,4*P\e(B") | |
154 | ("\e$,1<N\e(B" . "\e$,4*Q\e(B") | |
155 | ("\e$,1<O\e(B" . "\e$,4*R\e(B") | |
156 | ("\e$,1<P\e(B" . "\e$,4*S\e(B") | |
157 | ("\e$,1<Q\e(B" . "\e$,4*X\e(B") | |
158 | ("\e$,1<R\e(B" . "\e$,4*T\e(B") | |
159 | ("\e$,1<S\e(B" . "\e$,4*W\e(B") | |
160 | ("\e$,1<T\e(B" . "\e$,4*V\e(B") | |
161 | ("\e$,1<U\e(B" . "\e$,4*U\e(B") | |
162 | ("\e$,1<W\e(B" . "\e$,4*[\e(B") | |
163 | ("\e$,1<W<m\e(B" . "\e$,4)7\e(B") | |
164 | ("\e$,1<W<m<P<`\e(B" . "\e$,4*_\e(B") | |
165 | ("\e$,1<X\e(B" . "\e$,4*Z\e(B") | |
166 | ("\e$,1<X<m\e(B" . "\e$,4)6\e(B") | |
167 | ("\e$,1<Y\e(B" . "\e$,4*]\e(B") | |
168 | ("\e$,1<Y<m\e(B" . "\e$,4)9\e(B") | |
169 | ||
170 | ;; Dependent vowel signs | |
171 | ("\e$,1<^\e(B" . "\e$,4)c\e(B") | |
172 | ("\e$,1<_\e(B" . "\e$,4)d\e(B") | |
173 | ("\e$,1<`\e(B" . "\e$,4)f\e(B") | |
174 | ("\e$,1<a\e(B" . "\e$,4)g\e(B") | |
175 | ("\e$,1<b\e(B" . "\e$,4)h\e(B") | |
176 | ("\e$,1<f\e(B" . "\e$,4)j\e(B") | |
177 | ("\e$,1<g\e(B" . "\e$,4)k\e(B") | |
178 | ("\e$,1<h\e(B" . "\e$,4)l\e(B") | |
179 | ("\e$,1<j\e(B" . "\e$,4)j)c\e(B") | |
180 | ("\e$,1<k\e(B" . "\e$,4)k)c\e(B") | |
181 | ("\e$,1<l\e(B" . "\e$,4)j*W\e(B") | |
182 | ||
183 | ;; Various signs | |
184 | ("\e$,1<m\e(B" . "\e$,4)b\e(B") | |
185 | ("\e$,1<w\e(B" . "nil") ;; not supported? | |
186 | )) | |
187 | ||
188 | (defvar tml-char-glyph-hash | |
189 | (let* ((hash (make-hash-table :test 'equal))) | |
190 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
191 | tml-char-glyph) | |
192 | hash)) | |
193 | ||
194 | (defvar tml-char-glyph-regexp | |
195 | (tamil-regexp-of-hashtbl-keys tml-char-glyph-hash)) | |
196 | ||
197 | ;; Tamil languages needed to be reordered. | |
198 | ||
199 | (defvar tml-consonants-regexp | |
200 | "[\e$,4*H*^*I*J*\*K*L*M*N*O*Y*P*Q*R*S*X*T*W*V*U*[*Z*]\e(B]") | |
201 | ||
202 | (defvar tml-glyph-reorder-key-glyphs "[\e$,4)j)k)l\e(B]") | |
203 | ||
204 | (defvar tml-glyph-reordering-regexp-list | |
205 | (cons | |
206 | (concat "\\(" tml-consonants-regexp "\\)\\([\e$,4)j)k)l\e(B]\\)") "\\2\\1")) | |
207 | ||
208 | ;; Tamil vowel modifiers to be ligatured. | |
209 | (defvar tml-glyph-glyph | |
210 | '( | |
211 | ("\e$,4*H)d\e(B" . "\e$,4(a\e(B") ; ki | |
212 | ("\e$,4*^)d\e(B" . "\e$,4(v\e(B") ; ksi | |
213 | ("\e$,4*^)f\e(B" . "\e$,4)2\e(B") ; ksi~ | |
214 | ("\e$,4*I)d\e(B" . "\e$,4(b\e(B") ; n^i | |
215 | ("\e$,4*J)d\e(B" . "\e$,4(c\e(B") ; ci | |
216 | ("\e$,4*K)d\e(B" . "\e$,4(d\e(B") ; n~i | |
217 | ("\e$,4*L)d\e(B" . "\e$,4)n\e(B") ; t.i | |
218 | ("\e$,4*M)d\e(B" . "\e$,4(e\e(B") ; n.i | |
219 | ("\e$,4*N)d\e(B" . "\e$,4(f\e(B") ; ti | |
220 | ("\e$,4*O)d\e(B" . "\e$,4(g\e(B") ; ni | |
221 | ("\e$,4*P)d\e(B" . "\e$,4(h\e(B") ; pi | |
222 | ("\e$,4*Q)d\e(B" . "\e$,4(i\e(B") ; mi | |
223 | ("\e$,4*R)d\e(B" . "\e$,4(j\e(B") ; yi | |
224 | ("\e$,4*S)d\e(B" . "\e$,4(k\e(B") ; ri | |
225 | ("\e$,4*T)d\e(B" . "\e$,4(l\e(B") ; li | |
226 | ("\e$,4*U)d\e(B" . "\e$,4(m\e(B") ; vi | |
227 | ("\e$,4*V)d\e(B" . "\e$,4(n\e(B") ; l_i | |
228 | ("\e$,4*W)d\e(B" . "\e$,4(o\e(B") ; l.i | |
229 | ("\e$,4*X)d\e(B" . "\e$,4(p\e(B") ; r_i | |
230 | ("\e$,4*Y)d\e(B" . "\e$,4(q\e(B") ; n_i | |
231 | ("\e$,4*Z)d\e(B" . "\e$,4(r\e(B") ; si | |
232 | ("\e$,4*[)d\e(B" . "\e$,4(s\e(B") ; s'i | |
233 | ("\e$,4*\)d\e(B" . "\e$,4(t\e(B") ; ji | |
234 | ("\e$,4*])d\e(B" . "\e$,4(u\e(B") ; hi | |
235 | ||
236 | ("\e$,4*H)f\e(B" . "\e$,4(w\e(B") ; ki~ | |
237 | ("\e$,4*I)f\e(B" . "\e$,4(x\e(B") ; n^i~ | |
238 | ("\e$,4*J)f\e(B" . "\e$,4(y\e(B") ; ci~ | |
239 | ("\e$,4*K)f\e(B" . "\e$,4(z\e(B") ; n~i~ | |
240 | ("\e$,4*L)f\e(B" . "\e$,4)o\e(B") ; t.i~ | |
241 | ("\e$,4*M)f\e(B" . "\e$,4)!\e(B") ; n.i~ | |
242 | ("\e$,4*N)f\e(B" . "\e$,4)"\e(B") ; ti~ | |
243 | ("\e$,4*O)f\e(B" . "\e$,4)#\e(B") ; ni~ | |
244 | ("\e$,4*P)f\e(B" . "\e$,4)$\e(B") ; pi~ | |
245 | ("\e$,4*Q)f\e(B" . "\e$,4)%\e(B") ; mi~ | |
246 | ("\e$,4*R)f\e(B" . "\e$,4)&\e(B") ; yi~ | |
247 | ("\e$,4*S)f\e(B" . "\e$,4)'\e(B") ; ri~ | |
248 | ("\e$,4*T)f\e(B" . "\e$,4)(\e(B") ; li~ | |
249 | ("\e$,4*U)f\e(B" . "\e$,4))\e(B") ; vi~ | |
250 | ("\e$,4*V)f\e(B" . "\e$,4)*\e(B") ; l_i~ | |
251 | ("\e$,4*W)f\e(B" . "\e$,4)+\e(B") ; l.i~ | |
252 | ("\e$,4*X)f\e(B" . "\e$,4),\e(B") ; r_i~ | |
253 | ("\e$,4*Y)f\e(B" . "\e$,4)-\e(B") ; n_i~ | |
254 | ("\e$,4*Z)f\e(B" . "\e$,4).\e(B") ; si~ | |
255 | ("\e$,4*[)f\e(B" . "\e$,4)/\e(B") ; s'i~ | |
256 | ("\e$,4*\)f\e(B" . "\e$,4)0\e(B") ; ji~ | |
257 | ("\e$,4*])f\e(B" . "\e$,4)1\e(B") ; hi~ | |
258 | ||
259 | ("\e$,4*H)g\e(B" . "\e$,4)p\e(B") ; ku | |
260 | ("\e$,4*I)g\e(B" . "\e$,4)q\e(B") ; n^u | |
261 | ("\e$,4*J)g\e(B" . "\e$,4)r\e(B") ; cu | |
262 | ("\e$,4*K)g\e(B" . "\e$,4)s\e(B") ; n~u | |
263 | ("\e$,4*L)g\e(B" . "\e$,4)t\e(B") ; t.u | |
264 | ("\e$,4*M)g\e(B" . "\e$,4)u\e(B") ; n.u | |
265 | ("\e$,4*N)g\e(B" . "\e$,4)v\e(B") ; tu | |
266 | ("\e$,4*O)g\e(B" . "\e$,4)x\e(B") ; nu | |
267 | ("\e$,4*P)g\e(B" . "\e$,4)y\e(B") ; pu | |
268 | ("\e$,4*Q)g\e(B" . "\e$,4)z\e(B") ; mu | |
269 | ("\e$,4*R)g\e(B" . "\e$,4){\e(B") ; yu | |
270 | ("\e$,4*S)g\e(B" . "\e$,4)|\e(B") ; ru | |
271 | ("\e$,4*T)g\e(B" . "\e$,4)}\e(B") ; lu | |
272 | ("\e$,4*U)g\e(B" . "\e$,4)~\e(B") ; vu | |
273 | ("\e$,4*V)g\e(B" . "\e$,4)\7f\e(B") ; l_u | |
274 | ("\e$,4*W)g\e(B" . "\e$,4* \e(B") ; l.u | |
275 | ("\e$,4*X)g\e(B" . "\e$,4*!\e(B") ; r_u | |
276 | ("\e$,4*Y)g\e(B" . "\e$,4*"\e(B") ; n_u | |
277 | ||
278 | ("\e$,4*H)h\e(B" . "\e$,4*#\e(B") ; ku~ | |
279 | ("\e$,4*I)h\e(B" . "\e$,4*$\e(B") ; n^u~ | |
280 | ("\e$,4*J)h\e(B" . "\e$,4*%\e(B") ; cu~ | |
281 | ("\e$,4*K)h\e(B" . "\e$,4*&\e(B") ; n~u~ | |
282 | ("\e$,4*L)h\e(B" . "\e$,4*'\e(B") ; t.u~ | |
283 | ("\e$,4*M)h\e(B" . "\e$,4*(\e(B") ; n.u~ | |
284 | ("\e$,4*N)h\e(B" . "\e$,4*)\e(B") ; tu~ | |
285 | ("\e$,4*O)h\e(B" . "\e$,4*+\e(B") ; nu~ | |
286 | ("\e$,4*P)h\e(B" . "\e$,4*,\e(B") ; pu~ | |
287 | ("\e$,4*Q)h\e(B" . "\e$,4*-\e(B") ; mu~ | |
288 | ("\e$,4*R)h\e(B" . "\e$,4*.\e(B") ; yu~ | |
289 | ("\e$,4*S)h\e(B" . "\e$,4*/\e(B") ; ru~ | |
290 | ("\e$,4*T)h\e(B" . "\e$,4*6\e(B") ; lu~ | |
291 | ("\e$,4*U)h\e(B" . "\e$,4*7\e(B") ; vu~ | |
292 | ("\e$,4*V)h\e(B" . "\e$,4*8\e(B") ; l_u~ | |
293 | ("\e$,4*W)h\e(B" . "\e$,4*9\e(B") ; l.u~ | |
294 | ("\e$,4*X)h\e(B" . "\e$,4*:\e(B") ; r_u~ | |
295 | ("\e$,4*Y)h\e(B" . "\e$,4*;\e(B") ; n_u~ | |
296 | )) | |
297 | ||
298 | (defvar tml-glyph-glyph-hash | |
299 | (let* ((hash (make-hash-table :test 'equal))) | |
300 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
301 | tml-glyph-glyph) | |
302 | hash)) | |
303 | ||
304 | (defvar tml-glyph-glyph-regexp | |
305 | (tamil-regexp-of-hashtbl-keys tml-glyph-glyph-hash)) | |
306 | ||
307 | (defun tamil-compose-syllable-string (string) | |
308 | (with-temp-buffer | |
309 | (insert (decompose-string string)) | |
310 | (tamil-compose-syllable-region (point-min) (point-max)) | |
311 | (buffer-string))) | |
312 | ||
313 | (defun tamil-compose-syllable-region (from to) | |
314 | "Compose tamil syllable in region FROM to TO." | |
315 | (let (glyph-str match-str glyph-reorder-regexps) | |
316 | (save-excursion | |
317 | (save-restriction | |
318 | (narrow-to-region from to) | |
319 | (goto-char (point-min)) | |
320 | ;; char-glyph-conversion | |
8f924df7 KH |
321 | (while (not (eobp)) |
322 | (if (looking-at tml-char-glyph-regexp) | |
323 | (progn | |
324 | (setq match-str (match-string 0) | |
325 | glyph-str | |
326 | (concat glyph-str | |
327 | (gethash match-str tml-char-glyph-hash))) | |
328 | (goto-char (match-end 0))) | |
329 | (setq glyph-str (concat glyph-str (string (following-char)))) | |
330 | (forward-char 1))) | |
331 | (or glyph-str | |
332 | (aset glyph-str 0 (following-char))) | |
585eb076 KH |
333 | ;; glyph reordering |
334 | (when (string-match tml-glyph-reorder-key-glyphs glyph-str) | |
335 | (if (string-match (car tml-glyph-reordering-regexp-list) | |
336 | glyph-str) | |
337 | (setq glyph-str | |
338 | (replace-match (cdr tml-glyph-reordering-regexp-list) | |
339 | nil nil glyph-str)))) | |
340 | ;; glyph-glyph-conversion | |
341 | (when (string-match tml-glyph-glyph-regexp glyph-str) | |
342 | (setq match-str (match-string 0 glyph-str)) | |
343 | (setq glyph-str | |
344 | (replace-match (gethash match-str tml-glyph-glyph-hash) | |
345 | nil nil glyph-str))) | |
346 | ;; concatenate and attach reference-points. | |
347 | (setq glyph-str | |
348 | (cdr | |
349 | (apply | |
350 | 'nconc | |
351 | (mapcar | |
352 | (function | |
353 | (lambda (x) (list '(5 . 3) x))) ;; default ref. point. | |
354 | glyph-str)))) | |
355 | (compose-region from to glyph-str))))) | |
356 | ||
8f924df7 KH |
357 | ;;;###autoload |
358 | (defun tamil-composition-function (pos &optional string) | |
359 | "Compose Tamil characters after the position POS. | |
360 | If STRING is not nil, it is a string, and POS is an index to the string. | |
361 | In this case, compose characters after POS of the string." | |
362 | (if string | |
363 | ;; Not yet implemented. | |
364 | nil | |
365 | (goto-char pos) | |
366 | (if (looking-at tamil-composable-pattern) | |
367 | (prog1 (match-end 0) | |
368 | (tamil-compose-syllable-region pos (match-end 0)))))) | |
369 | ||
585eb076 KH |
370 | (provide 'tml-util) |
371 | ||
6b61353c | 372 | ;;; arch-tag: 4d1c9737-e7b1-44cf-a040-4f64c50e773e |
585eb076 | 373 | ;;; tml-util.el ends here |