*** empty log message ***
[bpt/emacs.git] / lisp / language / tml-util.el
1 ;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*-
2
3 ;; Copyright (C) 2001 Free Software Foundation, Inc.
4
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6 ;; Keywords: multilingual, Indian, Tamil
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;; Created: Nov. 08. 2002
26
27 ;;; Commentary:
28
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
30 ;; composition of Tamil script characters.
31
32 ;;; Code:
33
34 ;; Tamil Composable Pattern
35 ;; C .. Consonants
36 ;; V .. Vowel
37 ;; H .. Pulli
38 ;; M .. Matra
39 ;; V .. Vowel
40 ;; A .. Anuswar
41 ;; D .. Chandrabindu
42 ;; 1. vowel
43 ;; V
44 ;; 2. syllable : only ligature-formed pattern forms composition.
45 ;; (CkHCs|C)(H|M)?
46 ;; 3. sri special
47 ;; (CsHCrVi)
48
49 ;; oririnal
50 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
51
52 (defconst tamil-consonant
53 "[\e$,1<5\e(B-\e$,1<Y\e(B]")
54
55 (defconst tamil-composable-pattern
56 (concat
57 "\\([\e$,1<%\e(B-\e$,1<4\e(B]\\)\\|"
58 "[\e$,1<"<#\e(B]\\|" ;; vowel modifier considered independent
59 "\\(\\(?:\\(?:\e$,1<5<m<W\e(B\\)\\|[\e$,1<5\e(B-\e$,1<Y\e(B]\\)[\e$,1<m<^\e(B-\e$,1<l\e(B]?\\)\\|"
60 "\\(\e$,1<W<m<P<`\e(B\\)")
61 "Regexp matching a composable sequence of Tamil characters.")
62
63 ;;;###autoload
64 (defun tamil-compose-region (from to)
65 (interactive "r")
66 (save-excursion
67 (save-restriction
68 (narrow-to-region from to)
69 (goto-char (point-min))
70 (while (re-search-forward tamil-composable-pattern nil t)
71 (tamil-compose-syllable-region (match-beginning 0)
72 (match-end 0))))))
73 (defun tamil-compose-string (string)
74 (with-temp-buffer
75 (insert (decompose-string string))
76 (tamil-compose-region (point-min) (point-max))
77 (buffer-string)))
78
79 (defun tamil-post-read-conversion (len)
80 (save-excursion
81 (save-restriction
82 (let ((buffer-modified-p (buffer-modified-p)))
83 (narrow-to-region (point) (+ (point) len))
84 (tamil-compose-region (point-min) (point-max))
85 (set-buffer-modified-p buffer-modified-p)
86 (- (point-max) (point-min))))))
87
88 (defun tamil-range (from to)
89 "Make the list of the integers of range FROM to TO."
90 (let (result)
91 (while (<= from to) (setq result (cons to result) to (1- to))) result))
92
93 (defun tamil-regexp-of-hashtbl-keys (hashtbl)
94 "Return a regular expression that matches all keys in hashtable HASHTBL."
95 (let ((max-specpdl-size 1000))
96 (regexp-opt
97 (sort
98 (let (dummy)
99 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
100 dummy)
101 (function (lambda (x y) (> (length x) (length y))))))))
102
103
104 ;; Notes on conversion steps.
105
106 ;; 1. chars to glyphs
107 ;; Simple replacement of characters to glyphs is done.
108
109 ;; 2. glyphs reordering.
110 ;; following "\e$,4)j\e(B", "\e$,4)k\e(B", "\e$,4)l\e(B" goes to the front.
111
112 ;; 3. glyphs to glyphs
113 ;; reordered vowels are ligatured to consonants.
114
115 ;; 4. Composition.
116 ;; left modifiers will be attached at the left.
117 ;; others will be attached right.
118
119 (defvar tml-char-glyph
120 '(;; various signs
121 ("\e$,1<"\e(B" . "\e$,4)b\e(B") ;; not good
122 ("\e$,1<#\e(B" . "\e$,4*G\e(B")
123 ;; Independent Vowels
124 ("\e$,1<%\e(B" . "\e$,4*<\e(B")
125 ("\e$,1<&\e(B" . "\e$,4*=\e(B")
126 ("\e$,1<'\e(B" . "\e$,4*>\e(B")
127 ("\e$,1<(\e(B" . "\e$,4*?\e(B")
128 ("\e$,1<)\e(B" . "\e$,4*@\e(B")
129 ("\e$,1<*\e(B" . "\e$,4*A\e(B")
130 ("\e$,1<.\e(B" . "\e$,4*B\e(B")
131 ("\e$,1</\e(B" . "\e$,4*C\e(B")
132 ("\e$,1<0\e(B" . "\e$,4*D\e(B")
133 ("\e$,1<2\e(B" . "\e$,4*E\e(B")
134 ("\e$,1<3\e(B" . "\e$,4*F\e(B")
135 ("\e$,1<4\e(B" . "\e$,4*E*W\e(B")
136 ;; Consonants
137 ("\e$,1<5<m<W<m\e(B" . "\e$,4):\e(B") ; ks.
138 ("\e$,1<5<m<W\e(B" . "\e$,4*^\e(B") ; ks
139 ("\e$,1<5\e(B" . "\e$,4*H\e(B")
140
141 ("\e$,1<9\e(B" . "\e$,4*I\e(B")
142 ("\e$,1<:\e(B" . "\e$,4*J\e(B")
143 ("\e$,1<<\e(B" . "\e$,4*\\e(B")
144 ("\e$,1<<<m\e(B" . "\e$,4)8\e(B")
145 ("\e$,1<>\e(B" . "\e$,4*K\e(B")
146 ("\e$,1<?\e(B" . "\e$,4*L\e(B")
147 ("\e$,1<C\e(B" . "\e$,4*M\e(B")
148 ("\e$,1<D\e(B" . "\e$,4*N\e(B")
149 ("\e$,1<H\e(B" . "\e$,4*O\e(B")
150 ("\e$,1<I\e(B" . "\e$,4*Y\e(B")
151 ("\e$,1<I<m\e(B" . "\e$,4)a\e(B")
152 ("\e$,1<J\e(B" . "\e$,4*P\e(B")
153 ("\e$,1<N\e(B" . "\e$,4*Q\e(B")
154 ("\e$,1<O\e(B" . "\e$,4*R\e(B")
155 ("\e$,1<P\e(B" . "\e$,4*S\e(B")
156 ("\e$,1<Q\e(B" . "\e$,4*X\e(B")
157 ("\e$,1<R\e(B" . "\e$,4*T\e(B")
158 ("\e$,1<S\e(B" . "\e$,4*W\e(B")
159 ("\e$,1<T\e(B" . "\e$,4*V\e(B")
160 ("\e$,1<U\e(B" . "\e$,4*U\e(B")
161 ("\e$,1<W\e(B" . "\e$,4*[\e(B")
162 ("\e$,1<W<m\e(B" . "\e$,4)7\e(B")
163 ("\e$,1<W<m<P<`\e(B" . "\e$,4*_\e(B")
164 ("\e$,1<X\e(B" . "\e$,4*Z\e(B")
165 ("\e$,1<X<m\e(B" . "\e$,4)6\e(B")
166 ("\e$,1<Y\e(B" . "\e$,4*]\e(B")
167 ("\e$,1<Y<m\e(B" . "\e$,4)9\e(B")
168
169 ;; Dependent vowel signs
170 ("\e$,1<^\e(B" . "\e$,4)c\e(B")
171 ("\e$,1<_\e(B" . "\e$,4)d\e(B")
172 ("\e$,1<`\e(B" . "\e$,4)f\e(B")
173 ("\e$,1<a\e(B" . "\e$,4)g\e(B")
174 ("\e$,1<b\e(B" . "\e$,4)h\e(B")
175 ("\e$,1<f\e(B" . "\e$,4)j\e(B")
176 ("\e$,1<g\e(B" . "\e$,4)k\e(B")
177 ("\e$,1<h\e(B" . "\e$,4)l\e(B")
178 ("\e$,1<j\e(B" . "\e$,4)j)c\e(B")
179 ("\e$,1<k\e(B" . "\e$,4)k)c\e(B")
180 ("\e$,1<l\e(B" . "\e$,4)j*W\e(B")
181
182 ;; Various signs
183 ("\e$,1<m\e(B" . "\e$,4)b\e(B")
184 ("\e$,1<w\e(B" . "nil") ;; not supported?
185 ))
186
187 (defvar tml-char-glyph-hash
188 (let* ((hash (make-hash-table :test 'equal)))
189 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
190 tml-char-glyph)
191 hash))
192
193 (defvar tml-char-glyph-regexp
194 (tamil-regexp-of-hashtbl-keys tml-char-glyph-hash))
195
196 ;; Tamil languages needed to be reordered.
197
198 (defvar tml-consonants-regexp
199 "[\e$,4*H*^*I*J*\*K*L*M*N*O*Y*P*Q*R*S*X*T*W*V*U*[*Z*]\e(B]")
200
201 (defvar tml-glyph-reorder-key-glyphs "[\e$,4)j)k)l\e(B]")
202
203 (defvar tml-glyph-reordering-regexp-list
204 (cons
205 (concat "\\(" tml-consonants-regexp "\\)\\([\e$,4)j)k)l\e(B]\\)") "\\2\\1"))
206
207 ;; Tamil vowel modifiers to be ligatured.
208 (defvar tml-glyph-glyph
209 '(
210 ("\e$,4*H)d\e(B" . "\e$,4(a\e(B") ; ki
211 ("\e$,4*^)d\e(B" . "\e$,4(v\e(B") ; ksi
212 ("\e$,4*^)f\e(B" . "\e$,4)2\e(B") ; ksi~
213 ("\e$,4*I)d\e(B" . "\e$,4(b\e(B") ; n^i
214 ("\e$,4*J)d\e(B" . "\e$,4(c\e(B") ; ci
215 ("\e$,4*K)d\e(B" . "\e$,4(d\e(B") ; n~i
216 ("\e$,4*L)d\e(B" . "\e$,4)n\e(B") ; t.i
217 ("\e$,4*M)d\e(B" . "\e$,4(e\e(B") ; n.i
218 ("\e$,4*N)d\e(B" . "\e$,4(f\e(B") ; ti
219 ("\e$,4*O)d\e(B" . "\e$,4(g\e(B") ; ni
220 ("\e$,4*P)d\e(B" . "\e$,4(h\e(B") ; pi
221 ("\e$,4*Q)d\e(B" . "\e$,4(i\e(B") ; mi
222 ("\e$,4*R)d\e(B" . "\e$,4(j\e(B") ; yi
223 ("\e$,4*S)d\e(B" . "\e$,4(k\e(B") ; ri
224 ("\e$,4*T)d\e(B" . "\e$,4(l\e(B") ; li
225 ("\e$,4*U)d\e(B" . "\e$,4(m\e(B") ; vi
226 ("\e$,4*V)d\e(B" . "\e$,4(n\e(B") ; l_i
227 ("\e$,4*W)d\e(B" . "\e$,4(o\e(B") ; l.i
228 ("\e$,4*X)d\e(B" . "\e$,4(p\e(B") ; r_i
229 ("\e$,4*Y)d\e(B" . "\e$,4(q\e(B") ; n_i
230 ("\e$,4*Z)d\e(B" . "\e$,4(r\e(B") ; si
231 ("\e$,4*[)d\e(B" . "\e$,4(s\e(B") ; s'i
232 ("\e$,4*\)d\e(B" . "\e$,4(t\e(B") ; ji
233 ("\e$,4*])d\e(B" . "\e$,4(u\e(B") ; hi
234
235 ("\e$,4*H)f\e(B" . "\e$,4(w\e(B") ; ki~
236 ("\e$,4*I)f\e(B" . "\e$,4(x\e(B") ; n^i~
237 ("\e$,4*J)f\e(B" . "\e$,4(y\e(B") ; ci~
238 ("\e$,4*K)f\e(B" . "\e$,4(z\e(B") ; n~i~
239 ("\e$,4*L)f\e(B" . "\e$,4)o\e(B") ; t.i~
240 ("\e$,4*M)f\e(B" . "\e$,4)!\e(B") ; n.i~
241 ("\e$,4*N)f\e(B" . "\e$,4)"\e(B") ; ti~
242 ("\e$,4*O)f\e(B" . "\e$,4)#\e(B") ; ni~
243 ("\e$,4*P)f\e(B" . "\e$,4)$\e(B") ; pi~
244 ("\e$,4*Q)f\e(B" . "\e$,4)%\e(B") ; mi~
245 ("\e$,4*R)f\e(B" . "\e$,4)&\e(B") ; yi~
246 ("\e$,4*S)f\e(B" . "\e$,4)'\e(B") ; ri~
247 ("\e$,4*T)f\e(B" . "\e$,4)(\e(B") ; li~
248 ("\e$,4*U)f\e(B" . "\e$,4))\e(B") ; vi~
249 ("\e$,4*V)f\e(B" . "\e$,4)*\e(B") ; l_i~
250 ("\e$,4*W)f\e(B" . "\e$,4)+\e(B") ; l.i~
251 ("\e$,4*X)f\e(B" . "\e$,4),\e(B") ; r_i~
252 ("\e$,4*Y)f\e(B" . "\e$,4)-\e(B") ; n_i~
253 ("\e$,4*Z)f\e(B" . "\e$,4).\e(B") ; si~
254 ("\e$,4*[)f\e(B" . "\e$,4)/\e(B") ; s'i~
255 ("\e$,4*\)f\e(B" . "\e$,4)0\e(B") ; ji~
256 ("\e$,4*])f\e(B" . "\e$,4)1\e(B") ; hi~
257
258 ("\e$,4*H)g\e(B" . "\e$,4)p\e(B") ; ku
259 ("\e$,4*I)g\e(B" . "\e$,4)q\e(B") ; n^u
260 ("\e$,4*J)g\e(B" . "\e$,4)r\e(B") ; cu
261 ("\e$,4*K)g\e(B" . "\e$,4)s\e(B") ; n~u
262 ("\e$,4*L)g\e(B" . "\e$,4)t\e(B") ; t.u
263 ("\e$,4*M)g\e(B" . "\e$,4)u\e(B") ; n.u
264 ("\e$,4*N)g\e(B" . "\e$,4)v\e(B") ; tu
265 ("\e$,4*O)g\e(B" . "\e$,4)x\e(B") ; nu
266 ("\e$,4*P)g\e(B" . "\e$,4)y\e(B") ; pu
267 ("\e$,4*Q)g\e(B" . "\e$,4)z\e(B") ; mu
268 ("\e$,4*R)g\e(B" . "\e$,4){\e(B") ; yu
269 ("\e$,4*S)g\e(B" . "\e$,4)|\e(B") ; ru
270 ("\e$,4*T)g\e(B" . "\e$,4)}\e(B") ; lu
271 ("\e$,4*U)g\e(B" . "\e$,4)~\e(B") ; vu
272 ("\e$,4*V)g\e(B" . "\e$,4)\7f\e(B") ; l_u
273 ("\e$,4*W)g\e(B" . "\e$,4* \e(B") ; l.u
274 ("\e$,4*X)g\e(B" . "\e$,4*!\e(B") ; r_u
275 ("\e$,4*Y)g\e(B" . "\e$,4*"\e(B") ; n_u
276
277 ("\e$,4*H)h\e(B" . "\e$,4*#\e(B") ; ku~
278 ("\e$,4*I)h\e(B" . "\e$,4*$\e(B") ; n^u~
279 ("\e$,4*J)h\e(B" . "\e$,4*%\e(B") ; cu~
280 ("\e$,4*K)h\e(B" . "\e$,4*&\e(B") ; n~u~
281 ("\e$,4*L)h\e(B" . "\e$,4*'\e(B") ; t.u~
282 ("\e$,4*M)h\e(B" . "\e$,4*(\e(B") ; n.u~
283 ("\e$,4*N)h\e(B" . "\e$,4*)\e(B") ; tu~
284 ("\e$,4*O)h\e(B" . "\e$,4*+\e(B") ; nu~
285 ("\e$,4*P)h\e(B" . "\e$,4*,\e(B") ; pu~
286 ("\e$,4*Q)h\e(B" . "\e$,4*-\e(B") ; mu~
287 ("\e$,4*R)h\e(B" . "\e$,4*.\e(B") ; yu~
288 ("\e$,4*S)h\e(B" . "\e$,4*/\e(B") ; ru~
289 ("\e$,4*T)h\e(B" . "\e$,4*6\e(B") ; lu~
290 ("\e$,4*U)h\e(B" . "\e$,4*7\e(B") ; vu~
291 ("\e$,4*V)h\e(B" . "\e$,4*8\e(B") ; l_u~
292 ("\e$,4*W)h\e(B" . "\e$,4*9\e(B") ; l.u~
293 ("\e$,4*X)h\e(B" . "\e$,4*:\e(B") ; r_u~
294 ("\e$,4*Y)h\e(B" . "\e$,4*;\e(B") ; n_u~
295 ))
296
297 (defvar tml-glyph-glyph-hash
298 (let* ((hash (make-hash-table :test 'equal)))
299 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
300 tml-glyph-glyph)
301 hash))
302
303 (defvar tml-glyph-glyph-regexp
304 (tamil-regexp-of-hashtbl-keys tml-glyph-glyph-hash))
305
306 (defun tamil-compose-syllable-string (string)
307 (with-temp-buffer
308 (insert (decompose-string string))
309 (tamil-compose-syllable-region (point-min) (point-max))
310 (buffer-string)))
311
312 (defun tamil-compose-syllable-region (from to)
313 "Compose tamil syllable in region FROM to TO."
314 (let (glyph-str match-str glyph-reorder-regexps)
315 (save-excursion
316 (save-restriction
317 (narrow-to-region from to)
318 (goto-char (point-min))
319 ;; char-glyph-conversion
320 (while (not (eobp))
321 (if (looking-at tml-char-glyph-regexp)
322 (progn
323 (setq match-str (match-string 0)
324 glyph-str
325 (concat glyph-str
326 (gethash match-str tml-char-glyph-hash)))
327 (goto-char (match-end 0)))
328 (setq glyph-str (concat glyph-str (string (following-char))))
329 (forward-char 1)))
330 (or glyph-str
331 (aset glyph-str 0 (following-char)))
332 ;; glyph reordering
333 (when (string-match tml-glyph-reorder-key-glyphs glyph-str)
334 (if (string-match (car tml-glyph-reordering-regexp-list)
335 glyph-str)
336 (setq glyph-str
337 (replace-match (cdr tml-glyph-reordering-regexp-list)
338 nil nil glyph-str))))
339 ;; glyph-glyph-conversion
340 (when (string-match tml-glyph-glyph-regexp glyph-str)
341 (setq match-str (match-string 0 glyph-str))
342 (setq glyph-str
343 (replace-match (gethash match-str tml-glyph-glyph-hash)
344 nil nil glyph-str)))
345 ;; concatenate and attach reference-points.
346 (setq glyph-str
347 (cdr
348 (apply
349 'nconc
350 (mapcar
351 (function
352 (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
353 glyph-str))))
354 (compose-region from to glyph-str)))))
355
356 ;;;###autoload
357 (defun tamil-composition-function (pos &optional string)
358 "Compose Tamil characters after the position POS.
359 If STRING is not nil, it is a string, and POS is an index to the string.
360 In this case, compose characters after POS of the string."
361 (if string
362 ;; Not yet implemented.
363 nil
364 (goto-char pos)
365 (if (looking-at tamil-composable-pattern)
366 (prog1 (match-end 0)
367 (tamil-compose-syllable-region pos (match-end 0))))))
368
369 (provide 'tml-util)
370
371 ;;; tml-util.el ends here