Merge from emacs--rel--22
[bpt/emacs.git] / lisp / language / tml-util.el
1 ;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*-
2
3 ;; Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6 ;; Keywords: multilingual, Indian, Tamil
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 ;; Boston, MA 02110-1301, USA.
24
25 ;; Created: Nov. 08. 2002
26
27 ;;; Commentary:
28
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
30 ;; composition of Tamil script characters.
31
32 ;;; Code:
33
34 ;; Tamil Composable Pattern
35 ;; C .. Consonants
36 ;; V .. Vowel
37 ;; H .. Pulli
38 ;; M .. Matra
39 ;; V .. Vowel
40 ;; A .. Anuswar
41 ;; D .. Chandrabindu
42 ;; 1. vowel
43 ;; V
44 ;; 2. syllable : only ligature-formed pattern forms composition.
45 ;; (CkHCs|C)(H|M)?
46 ;; 3. sri special
47 ;; (CsHCrVi)
48
49 ;; oririnal
50 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
51
52 (defconst tamil-consonant
53 "[\e$,1<5\e(B-\e$,1<Y\e(B]")
54
55 (defconst tamil-composable-pattern
56 (concat
57 "\\([\e$,1<%\e(B-\e$,1<4\e(B]\\)\\|"
58 "[\e$,1<"<#\e(B]\\|" ;; vowel modifier considered independent
59 "\\(\\(?:\\(?:\e$,1<5<m<W\e(B\\)\\|[\e$,1<5\e(B-\e$,1<Y\e(B]\\)[\e$,1<m<^\e(B-\e$,1<l\e(B]?\\)\\|"
60 "\\(\e$,1<W<m<P<`\e(B\\)")
61 "Regexp matching a composable sequence of Tamil characters.")
62
63 ;;;###autoload
64 (defun tamil-compose-region (from to)
65 (interactive "r")
66 (save-excursion
67 (save-restriction
68 (narrow-to-region from to)
69 (goto-char (point-min))
70 (while (re-search-forward tamil-composable-pattern nil t)
71 (tamil-compose-syllable-region (match-beginning 0)
72 (match-end 0))))))
73 (defun tamil-compose-string (string)
74 (with-temp-buffer
75 (insert (decompose-string string))
76 (tamil-compose-region (point-min) (point-max))
77 (buffer-string)))
78
79 ;;;###autoload
80 (defun tamil-post-read-conversion (len)
81 (save-excursion
82 (save-restriction
83 (let ((buffer-modified-p (buffer-modified-p)))
84 (narrow-to-region (point) (+ (point) len))
85 (tamil-compose-region (point-min) (point-max))
86 (set-buffer-modified-p buffer-modified-p)
87 (- (point-max) (point-min))))))
88
89 (defun tamil-range (from to)
90 "Make the list of the integers of range FROM to TO."
91 (let (result)
92 (while (<= from to) (setq result (cons to result) to (1- to))) result))
93
94 (defun tamil-regexp-of-hashtbl-keys (hashtbl)
95 "Return a regular expression that matches all keys in hashtable HASHTBL."
96 (let ((max-specpdl-size 1000))
97 (regexp-opt
98 (sort
99 (let (dummy)
100 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
101 dummy)
102 (function (lambda (x y) (> (length x) (length y))))))))
103
104
105 ;;;###autoload
106 (defun tamil-composition-function (from to pattern &optional string)
107 "Compose Tamil characters in REGION, or STRING if specified.
108 Assume that the REGION or STRING must fully match the composable
109 PATTERN regexp."
110 (if string (tamil-compose-syllable-string string)
111 (tamil-compose-syllable-region from to))
112 (- to from))
113
114 ;; Register a function to compose Tamil characters.
115 (mapc
116 (function (lambda (ucs)
117 (aset composition-function-table (decode-char 'ucs ucs)
118 (list (cons tamil-composable-pattern
119 'tamil-composition-function)))))
120 (nconc '(#x0b82 #x0b83) (tamil-range #x0b85 #x0bb9)))
121
122 ;; Notes on conversion steps.
123
124 ;; 1. chars to glyphs
125 ;; Simple replacement of characters to glyphs is done.
126
127 ;; 2. glyphs reordering.
128 ;; following "\e$,4)j\e(B", "\e$,4)k\e(B", "\e$,4)l\e(B" goes to the front.
129
130 ;; 3. glyphs to glyphs
131 ;; reordered vowels are ligatured to consonants.
132
133 ;; 4. Composition.
134 ;; left modifiers will be attached at the left.
135 ;; others will be attached right.
136
137 (defvar tml-char-glyph
138 '(;; various signs
139 ;;("\e$,1<"\e(B" . "")
140 ("\e$,1<#\e(B" . "\e$,4*G\e(B")
141 ;; Independent Vowels
142 ("\e$,1<%\e(B" . "\e$,4*<\e(B")
143 ("\e$,1<&\e(B" . "\e$,4*=\e(B")
144 ("\e$,1<'\e(B" . "\e$,4*>\e(B")
145 ("\e$,1<(\e(B" . "\e$,4*?\e(B")
146 ("\e$,1<)\e(B" . "\e$,4*@\e(B")
147 ("\e$,1<*\e(B" . "\e$,4*A\e(B")
148 ("\e$,1<.\e(B" . "\e$,4*B\e(B")
149 ("\e$,1</\e(B" . "\e$,4*C\e(B")
150 ("\e$,1<0\e(B" . "\e$,4*D\e(B")
151 ("\e$,1<2\e(B" . "\e$,4*E\e(B")
152 ("\e$,1<3\e(B" . "\e$,4*F\e(B")
153 ("\e$,1<4\e(B" . "\e$,4*E*W\e(B")
154 ;; Consonants
155 ("\e$,1<5<m<W<m\e(B" . "\e$,4):\e(B") ; ks.
156 ("\e$,1<5<m<W\e(B" . "\e$,4*^\e(B") ; ks
157 ("\e$,1<5\e(B" . "\e$,4*H\e(B")
158
159 ("\e$,1<9\e(B" . "\e$,4*I\e(B")
160 ("\e$,1<:\e(B" . "\e$,4*J\e(B")
161 ("\e$,1<<\e(B" . "\e$,4*\\e(B")
162 ("\e$,1<<<m\e(B" . "\e$,4)8\e(B")
163 ("\e$,1<>\e(B" . "\e$,4*K\e(B")
164 ("\e$,1<?\e(B" . "\e$,4*L\e(B")
165 ("\e$,1<C\e(B" . "\e$,4*M\e(B")
166 ("\e$,1<D\e(B" . "\e$,4*N\e(B")
167 ("\e$,1<H\e(B" . "\e$,4*O\e(B")
168 ("\e$,1<I\e(B" . "\e$,4*Y\e(B")
169 ("\e$,1<I<m\e(B" . "\e$,4)a\e(B")
170 ("\e$,1<J\e(B" . "\e$,4*P\e(B")
171 ("\e$,1<N\e(B" . "\e$,4*Q\e(B")
172 ("\e$,1<O\e(B" . "\e$,4*R\e(B")
173 ("\e$,1<P\e(B" . "\e$,4*S\e(B")
174 ("\e$,1<Q\e(B" . "\e$,4*X\e(B")
175 ("\e$,1<R\e(B" . "\e$,4*T\e(B")
176 ("\e$,1<S\e(B" . "\e$,4*W\e(B")
177 ("\e$,1<T\e(B" . "\e$,4*V\e(B")
178 ("\e$,1<U\e(B" . "\e$,4*U\e(B")
179 ("\e$,1<W\e(B" . "\e$,4*[\e(B")
180 ("\e$,1<W<m\e(B" . "\e$,4)7\e(B")
181 ("\e$,1<W<m<P<`\e(B" . "\e$,4*_\e(B")
182 ("\e$,1<X\e(B" . "\e$,4*Z\e(B")
183 ("\e$,1<X<m\e(B" . "\e$,4)6\e(B")
184 ("\e$,1<Y\e(B" . "\e$,4*]\e(B")
185 ("\e$,1<Y<m\e(B" . "\e$,4)9\e(B")
186
187 ;; Dependent vowel signs
188 ("\e$,1<^\e(B" . "\e$,4)c\e(B")
189 ("\e$,1<_\e(B" . "\e$,4)d\e(B")
190 ("\e$,1<`\e(B" . "\e$,4)f\e(B")
191 ("\e$,1<a\e(B" . "\e$,4)g\e(B")
192 ("\e$,1<b\e(B" . "\e$,4)h\e(B")
193 ("\e$,1<f\e(B" . "\e$,4)j\e(B")
194 ("\e$,1<g\e(B" . "\e$,4)k\e(B")
195 ("\e$,1<h\e(B" . "\e$,4)l\e(B")
196 ("\e$,1<j\e(B" . "\e$,4)j)c\e(B")
197 ("\e$,1<k\e(B" . "\e$,4)k)c\e(B")
198 ("\e$,1<l\e(B" . "\e$,4)j*W\e(B")
199
200 ;; Various signs
201 ("\e$,1<m\e(B" . "\e$,4)b\e(B")
202 ("\e$,1<w\e(B" . "nil") ;; not supported?
203 ))
204
205 (defvar tml-char-glyph-hash
206 (let* ((hash (make-hash-table :test 'equal)))
207 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
208 tml-char-glyph)
209 hash))
210
211 (defvar tml-char-glyph-regexp
212 (tamil-regexp-of-hashtbl-keys tml-char-glyph-hash))
213
214 ;; Tamil languages needed to be reordered.
215
216 (defvar tml-consonants-regexp
217 "[\e$,4*H*^*I*J*\*K*L*M*N*O*Y*P*Q*R*S*X*T*W*V*U*[*Z*]\e(B]")
218
219 (defvar tml-glyph-reorder-key-glyphs "[\e$,4)j)k)l\e(B]")
220
221 (defvar tml-glyph-reordering-regexp-list
222 (cons
223 (concat "\\(" tml-consonants-regexp "\\)\\([\e$,4)j)k)l\e(B]\\)") "\\2\\1"))
224
225 ;; Tamil vowel modifiers to be ligatured.
226 (defvar tml-glyph-glyph
227 '(
228 ("\e$,4*H)d\e(B" . "\e$,4(a\e(B") ; ki
229 ("\e$,4*^)d\e(B" . "\e$,4(v\e(B") ; ksi
230 ("\e$,4*^)f\e(B" . "\e$,4)2\e(B") ; ksi~
231 ("\e$,4*I)d\e(B" . "\e$,4(b\e(B") ; n^i
232 ("\e$,4*J)d\e(B" . "\e$,4(c\e(B") ; ci
233 ("\e$,4*K)d\e(B" . "\e$,4(d\e(B") ; n~i
234 ("\e$,4*L)d\e(B" . "\e$,4)n\e(B") ; t.i
235 ("\e$,4*M)d\e(B" . "\e$,4(e\e(B") ; n.i
236 ("\e$,4*N)d\e(B" . "\e$,4(f\e(B") ; ti
237 ("\e$,4*O)d\e(B" . "\e$,4(g\e(B") ; ni
238 ("\e$,4*P)d\e(B" . "\e$,4(h\e(B") ; pi
239 ("\e$,4*Q)d\e(B" . "\e$,4(i\e(B") ; mi
240 ("\e$,4*R)d\e(B" . "\e$,4(j\e(B") ; yi
241 ("\e$,4*S)d\e(B" . "\e$,4(k\e(B") ; ri
242 ("\e$,4*T)d\e(B" . "\e$,4(l\e(B") ; li
243 ("\e$,4*U)d\e(B" . "\e$,4(m\e(B") ; vi
244 ("\e$,4*V)d\e(B" . "\e$,4(n\e(B") ; l_i
245 ("\e$,4*W)d\e(B" . "\e$,4(o\e(B") ; l.i
246 ("\e$,4*X)d\e(B" . "\e$,4(p\e(B") ; r_i
247 ("\e$,4*Y)d\e(B" . "\e$,4(q\e(B") ; n_i
248 ("\e$,4*Z)d\e(B" . "\e$,4(r\e(B") ; si
249 ("\e$,4*[)d\e(B" . "\e$,4(s\e(B") ; s'i
250 ("\e$,4*\)d\e(B" . "\e$,4(t\e(B") ; ji
251 ("\e$,4*])d\e(B" . "\e$,4(u\e(B") ; hi
252
253 ("\e$,4*H)f\e(B" . "\e$,4(w\e(B") ; ki~
254 ("\e$,4*I)f\e(B" . "\e$,4(x\e(B") ; n^i~
255 ("\e$,4*J)f\e(B" . "\e$,4(y\e(B") ; ci~
256 ("\e$,4*K)f\e(B" . "\e$,4(z\e(B") ; n~i~
257 ("\e$,4*L)f\e(B" . "\e$,4)o\e(B") ; t.i~
258 ("\e$,4*M)f\e(B" . "\e$,4)!\e(B") ; n.i~
259 ("\e$,4*N)f\e(B" . "\e$,4)"\e(B") ; ti~
260 ("\e$,4*O)f\e(B" . "\e$,4)#\e(B") ; ni~
261 ("\e$,4*P)f\e(B" . "\e$,4)$\e(B") ; pi~
262 ("\e$,4*Q)f\e(B" . "\e$,4)%\e(B") ; mi~
263 ("\e$,4*R)f\e(B" . "\e$,4)&\e(B") ; yi~
264 ("\e$,4*S)f\e(B" . "\e$,4)'\e(B") ; ri~
265 ("\e$,4*T)f\e(B" . "\e$,4)(\e(B") ; li~
266 ("\e$,4*U)f\e(B" . "\e$,4))\e(B") ; vi~
267 ("\e$,4*V)f\e(B" . "\e$,4)*\e(B") ; l_i~
268 ("\e$,4*W)f\e(B" . "\e$,4)+\e(B") ; l.i~
269 ("\e$,4*X)f\e(B" . "\e$,4),\e(B") ; r_i~
270 ("\e$,4*Y)f\e(B" . "\e$,4)-\e(B") ; n_i~
271 ("\e$,4*Z)f\e(B" . "\e$,4).\e(B") ; si~
272 ("\e$,4*[)f\e(B" . "\e$,4)/\e(B") ; s'i~
273 ("\e$,4*\)f\e(B" . "\e$,4)0\e(B") ; ji~
274 ("\e$,4*])f\e(B" . "\e$,4)1\e(B") ; hi~
275
276 ("\e$,4*H)g\e(B" . "\e$,4)p\e(B") ; ku
277 ("\e$,4*I)g\e(B" . "\e$,4)q\e(B") ; n^u
278 ("\e$,4*J)g\e(B" . "\e$,4)r\e(B") ; cu
279 ("\e$,4*K)g\e(B" . "\e$,4)s\e(B") ; n~u
280 ("\e$,4*L)g\e(B" . "\e$,4)t\e(B") ; t.u
281 ("\e$,4*M)g\e(B" . "\e$,4)u\e(B") ; n.u
282 ("\e$,4*N)g\e(B" . "\e$,4)v\e(B") ; tu
283 ("\e$,4*O)g\e(B" . "\e$,4)x\e(B") ; nu
284 ("\e$,4*P)g\e(B" . "\e$,4)y\e(B") ; pu
285 ("\e$,4*Q)g\e(B" . "\e$,4)z\e(B") ; mu
286 ("\e$,4*R)g\e(B" . "\e$,4){\e(B") ; yu
287 ("\e$,4*S)g\e(B" . "\e$,4)|\e(B") ; ru
288 ("\e$,4*T)g\e(B" . "\e$,4)}\e(B") ; lu
289 ("\e$,4*U)g\e(B" . "\e$,4)~\e(B") ; vu
290 ("\e$,4*V)g\e(B" . "\e$,4)\7f\e(B") ; l_u
291 ("\e$,4*W)g\e(B" . "\e$,4* \e(B") ; l.u
292 ("\e$,4*X)g\e(B" . "\e$,4*!\e(B") ; r_u
293 ("\e$,4*Y)g\e(B" . "\e$,4*"\e(B") ; n_u
294
295 ("\e$,4*H)h\e(B" . "\e$,4*#\e(B") ; ku~
296 ("\e$,4*I)h\e(B" . "\e$,4*$\e(B") ; n^u~
297 ("\e$,4*J)h\e(B" . "\e$,4*%\e(B") ; cu~
298 ("\e$,4*K)h\e(B" . "\e$,4*&\e(B") ; n~u~
299 ("\e$,4*L)h\e(B" . "\e$,4*'\e(B") ; t.u~
300 ("\e$,4*M)h\e(B" . "\e$,4*(\e(B") ; n.u~
301 ("\e$,4*N)h\e(B" . "\e$,4*)\e(B") ; tu~
302 ("\e$,4*O)h\e(B" . "\e$,4*+\e(B") ; nu~
303 ("\e$,4*P)h\e(B" . "\e$,4*,\e(B") ; pu~
304 ("\e$,4*Q)h\e(B" . "\e$,4*-\e(B") ; mu~
305 ("\e$,4*R)h\e(B" . "\e$,4*.\e(B") ; yu~
306 ("\e$,4*S)h\e(B" . "\e$,4*/\e(B") ; ru~
307 ("\e$,4*T)h\e(B" . "\e$,4*6\e(B") ; lu~
308 ("\e$,4*U)h\e(B" . "\e$,4*7\e(B") ; vu~
309 ("\e$,4*V)h\e(B" . "\e$,4*8\e(B") ; l_u~
310 ("\e$,4*W)h\e(B" . "\e$,4*9\e(B") ; l.u~
311 ("\e$,4*X)h\e(B" . "\e$,4*:\e(B") ; r_u~
312 ("\e$,4*Y)h\e(B" . "\e$,4*;\e(B") ; n_u~
313 ))
314
315 (defvar tml-glyph-glyph-hash
316 (let* ((hash (make-hash-table :test 'equal)))
317 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
318 tml-glyph-glyph)
319 hash))
320
321 (defvar tml-glyph-glyph-regexp
322 (tamil-regexp-of-hashtbl-keys tml-glyph-glyph-hash))
323
324 (defun tamil-compose-syllable-string (string)
325 (with-temp-buffer
326 (insert (decompose-string string))
327 (tamil-compose-syllable-region (point-min) (point-max))
328 (buffer-string)))
329
330 (defun tamil-compose-syllable-region (from to)
331 "Compose tamil syllable in region FROM to TO."
332 (let (glyph-str match-str glyph-reorder-regexps)
333 (save-excursion
334 (save-restriction
335 (narrow-to-region from to)
336 (goto-char (point-min))
337 ;; char-glyph-conversion
338 (while (re-search-forward tml-char-glyph-regexp nil t)
339 (setq match-str (match-string 0))
340 (setq glyph-str
341 (concat glyph-str (gethash match-str tml-char-glyph-hash))))
342 ;; glyph reordering
343 (when (string-match tml-glyph-reorder-key-glyphs glyph-str)
344 (if (string-match (car tml-glyph-reordering-regexp-list)
345 glyph-str)
346 (setq glyph-str
347 (replace-match (cdr tml-glyph-reordering-regexp-list)
348 nil nil glyph-str))))
349 ;; glyph-glyph-conversion
350 (when (string-match tml-glyph-glyph-regexp glyph-str)
351 (setq match-str (match-string 0 glyph-str))
352 (setq glyph-str
353 (replace-match (gethash match-str tml-glyph-glyph-hash)
354 nil nil glyph-str)))
355 ;; concatenate and attach reference-points.
356 (setq glyph-str
357 (cdr
358 (apply
359 'nconc
360 (mapcar
361 (function
362 (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
363 glyph-str))))
364 (compose-region from to glyph-str)))))
365
366 (provide 'tml-util)
367
368 ;;; arch-tag: 4d1c9737-e7b1-44cf-a040-4f64c50e773e
369 ;;; tml-util.el ends here