Typo.
[bpt/emacs.git] / lisp / language / tml-util.el
1 ;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*-
2
3 ;; Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007, 2008
4 ;; Free Software Foundation, Inc.
5
6 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
7 ;; Keywords: multilingual, Indian, Tamil
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 3, or (at your option)
14 ;; any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24 ;; Boston, MA 02110-1301, USA.
25
26 ;; Created: Nov. 08. 2002
27
28 ;;; Commentary:
29
30 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
31 ;; composition of Tamil script characters.
32
33 ;;; Code:
34
35 ;; Tamil Composable Pattern
36 ;; C .. Consonants
37 ;; V .. Vowel
38 ;; H .. Pulli
39 ;; M .. Matra
40 ;; V .. Vowel
41 ;; A .. Anuswar
42 ;; D .. Chandrabindu
43 ;; 1. vowel
44 ;; V
45 ;; 2. syllable : only ligature-formed pattern forms composition.
46 ;; (CkHCs|C)(H|M)?
47 ;; 3. sri special
48 ;; (CsHCrVi)
49
50 ;; oririnal
51 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
52
53 (defconst tamil-consonant
54 "[\e$,1<5\e(B-\e$,1<Y\e(B]")
55
56 (defconst tamil-composable-pattern
57 (concat
58 "\\([\e$,1<%\e(B-\e$,1<4\e(B]\\)\\|"
59 "[\e$,1<"<#\e(B]\\|" ;; vowel modifier considered independent
60 "\\(\\(?:\\(?:\e$,1<5<m<W\e(B\\)\\|[\e$,1<5\e(B-\e$,1<Y\e(B]\\)[\e$,1<m<^\e(B-\e$,1<l\e(B]?\\)\\|"
61 "\\(\e$,1<W<m<P<`\e(B\\)")
62 "Regexp matching a composable sequence of Tamil characters.")
63
64 ;;;###autoload
65 (defun tamil-compose-region (from to)
66 (interactive "r")
67 (save-excursion
68 (save-restriction
69 (narrow-to-region from to)
70 (goto-char (point-min))
71 (while (re-search-forward tamil-composable-pattern nil t)
72 (tamil-compose-syllable-region (match-beginning 0)
73 (match-end 0))))))
74 (defun tamil-compose-string (string)
75 (with-temp-buffer
76 (insert (decompose-string string))
77 (tamil-compose-region (point-min) (point-max))
78 (buffer-string)))
79
80 ;;;###autoload
81 (defun tamil-post-read-conversion (len)
82 (save-excursion
83 (save-restriction
84 (let ((buffer-modified-p (buffer-modified-p)))
85 (narrow-to-region (point) (+ (point) len))
86 (tamil-compose-region (point-min) (point-max))
87 (set-buffer-modified-p buffer-modified-p)
88 (- (point-max) (point-min))))))
89
90 (defun tamil-range (from to)
91 "Make the list of the integers of range FROM to TO."
92 (let (result)
93 (while (<= from to) (setq result (cons to result) to (1- to))) result))
94
95 (defun tamil-regexp-of-hashtbl-keys (hashtbl)
96 "Return a regular expression that matches all keys in hashtable HASHTBL."
97 (let ((max-specpdl-size 1000))
98 (regexp-opt
99 (sort
100 (let (dummy)
101 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
102 dummy)
103 (function (lambda (x y) (> (length x) (length y))))))))
104
105
106 ;; Notes on conversion steps.
107
108 ;; 1. chars to glyphs
109 ;; Simple replacement of characters to glyphs is done.
110
111 ;; 2. glyphs reordering.
112 ;; following "\e$,4)j\e(B", "\e$,4)k\e(B", "\e$,4)l\e(B" goes to the front.
113
114 ;; 3. glyphs to glyphs
115 ;; reordered vowels are ligatured to consonants.
116
117 ;; 4. Composition.
118 ;; left modifiers will be attached at the left.
119 ;; others will be attached right.
120
121 (defvar tml-char-glyph
122 '(;; various signs
123 ("\e$,1<"\e(B" . "\e$,4)b\e(B") ;; not good
124 ("\e$,1<#\e(B" . "\e$,4*G\e(B")
125 ;; Independent Vowels
126 ("\e$,1<%\e(B" . "\e$,4*<\e(B")
127 ("\e$,1<&\e(B" . "\e$,4*=\e(B")
128 ("\e$,1<'\e(B" . "\e$,4*>\e(B")
129 ("\e$,1<(\e(B" . "\e$,4*?\e(B")
130 ("\e$,1<)\e(B" . "\e$,4*@\e(B")
131 ("\e$,1<*\e(B" . "\e$,4*A\e(B")
132 ("\e$,1<.\e(B" . "\e$,4*B\e(B")
133 ("\e$,1</\e(B" . "\e$,4*C\e(B")
134 ("\e$,1<0\e(B" . "\e$,4*D\e(B")
135 ("\e$,1<2\e(B" . "\e$,4*E\e(B")
136 ("\e$,1<3\e(B" . "\e$,4*F\e(B")
137 ("\e$,1<4\e(B" . "\e$,4*E*W\e(B")
138 ;; Consonants
139 ("\e$,1<5<m<W<m\e(B" . "\e$,4):\e(B") ; ks.
140 ("\e$,1<5<m<W\e(B" . "\e$,4*^\e(B") ; ks
141 ("\e$,1<5\e(B" . "\e$,4*H\e(B")
142
143 ("\e$,1<9\e(B" . "\e$,4*I\e(B")
144 ("\e$,1<:\e(B" . "\e$,4*J\e(B")
145 ("\e$,1<<\e(B" . "\e$,4*\\e(B")
146 ("\e$,1<<<m\e(B" . "\e$,4)8\e(B")
147 ("\e$,1<>\e(B" . "\e$,4*K\e(B")
148 ("\e$,1<?\e(B" . "\e$,4*L\e(B")
149 ("\e$,1<C\e(B" . "\e$,4*M\e(B")
150 ("\e$,1<D\e(B" . "\e$,4*N\e(B")
151 ("\e$,1<H\e(B" . "\e$,4*O\e(B")
152 ("\e$,1<I\e(B" . "\e$,4*Y\e(B")
153 ("\e$,1<I<m\e(B" . "\e$,4)a\e(B")
154 ("\e$,1<J\e(B" . "\e$,4*P\e(B")
155 ("\e$,1<N\e(B" . "\e$,4*Q\e(B")
156 ("\e$,1<O\e(B" . "\e$,4*R\e(B")
157 ("\e$,1<P\e(B" . "\e$,4*S\e(B")
158 ("\e$,1<Q\e(B" . "\e$,4*X\e(B")
159 ("\e$,1<R\e(B" . "\e$,4*T\e(B")
160 ("\e$,1<S\e(B" . "\e$,4*W\e(B")
161 ("\e$,1<T\e(B" . "\e$,4*V\e(B")
162 ("\e$,1<U\e(B" . "\e$,4*U\e(B")
163 ("\e$,1<W\e(B" . "\e$,4*[\e(B")
164 ("\e$,1<W<m\e(B" . "\e$,4)7\e(B")
165 ("\e$,1<W<m<P<`\e(B" . "\e$,4*_\e(B")
166 ("\e$,1<X\e(B" . "\e$,4*Z\e(B")
167 ("\e$,1<X<m\e(B" . "\e$,4)6\e(B")
168 ("\e$,1<Y\e(B" . "\e$,4*]\e(B")
169 ("\e$,1<Y<m\e(B" . "\e$,4)9\e(B")
170
171 ;; Dependent vowel signs
172 ("\e$,1<^\e(B" . "\e$,4)c\e(B")
173 ("\e$,1<_\e(B" . "\e$,4)d\e(B")
174 ("\e$,1<`\e(B" . "\e$,4)f\e(B")
175 ("\e$,1<a\e(B" . "\e$,4)g\e(B")
176 ("\e$,1<b\e(B" . "\e$,4)h\e(B")
177 ("\e$,1<f\e(B" . "\e$,4)j\e(B")
178 ("\e$,1<g\e(B" . "\e$,4)k\e(B")
179 ("\e$,1<h\e(B" . "\e$,4)l\e(B")
180 ("\e$,1<j\e(B" . "\e$,4)j)c\e(B")
181 ("\e$,1<k\e(B" . "\e$,4)k)c\e(B")
182 ("\e$,1<l\e(B" . "\e$,4)j*W\e(B")
183
184 ;; Various signs
185 ("\e$,1<m\e(B" . "\e$,4)b\e(B")
186 ("\e$,1<w\e(B" . "nil") ;; not supported?
187 ))
188
189 (defvar tml-char-glyph-hash
190 (let* ((hash (make-hash-table :test 'equal)))
191 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
192 tml-char-glyph)
193 hash))
194
195 (defvar tml-char-glyph-regexp
196 (tamil-regexp-of-hashtbl-keys tml-char-glyph-hash))
197
198 ;; Tamil languages needed to be reordered.
199
200 (defvar tml-consonants-regexp
201 "[\e$,4*H*^*I*J*\*K*L*M*N*O*Y*P*Q*R*S*X*T*W*V*U*[*Z*]\e(B]")
202
203 (defvar tml-glyph-reorder-key-glyphs "[\e$,4)j)k)l\e(B]")
204
205 (defvar tml-glyph-reordering-regexp-list
206 (cons
207 (concat "\\(" tml-consonants-regexp "\\)\\([\e$,4)j)k)l\e(B]\\)") "\\2\\1"))
208
209 ;; Tamil vowel modifiers to be ligatured.
210 (defvar tml-glyph-glyph
211 '(
212 ("\e$,4*H)d\e(B" . "\e$,4(a\e(B") ; ki
213 ("\e$,4*^)d\e(B" . "\e$,4(v\e(B") ; ksi
214 ("\e$,4*^)f\e(B" . "\e$,4)2\e(B") ; ksi~
215 ("\e$,4*I)d\e(B" . "\e$,4(b\e(B") ; n^i
216 ("\e$,4*J)d\e(B" . "\e$,4(c\e(B") ; ci
217 ("\e$,4*K)d\e(B" . "\e$,4(d\e(B") ; n~i
218 ("\e$,4*L)d\e(B" . "\e$,4)n\e(B") ; t.i
219 ("\e$,4*M)d\e(B" . "\e$,4(e\e(B") ; n.i
220 ("\e$,4*N)d\e(B" . "\e$,4(f\e(B") ; ti
221 ("\e$,4*O)d\e(B" . "\e$,4(g\e(B") ; ni
222 ("\e$,4*P)d\e(B" . "\e$,4(h\e(B") ; pi
223 ("\e$,4*Q)d\e(B" . "\e$,4(i\e(B") ; mi
224 ("\e$,4*R)d\e(B" . "\e$,4(j\e(B") ; yi
225 ("\e$,4*S)d\e(B" . "\e$,4(k\e(B") ; ri
226 ("\e$,4*T)d\e(B" . "\e$,4(l\e(B") ; li
227 ("\e$,4*U)d\e(B" . "\e$,4(m\e(B") ; vi
228 ("\e$,4*V)d\e(B" . "\e$,4(n\e(B") ; l_i
229 ("\e$,4*W)d\e(B" . "\e$,4(o\e(B") ; l.i
230 ("\e$,4*X)d\e(B" . "\e$,4(p\e(B") ; r_i
231 ("\e$,4*Y)d\e(B" . "\e$,4(q\e(B") ; n_i
232 ("\e$,4*Z)d\e(B" . "\e$,4(r\e(B") ; si
233 ("\e$,4*[)d\e(B" . "\e$,4(s\e(B") ; s'i
234 ("\e$,4*\)d\e(B" . "\e$,4(t\e(B") ; ji
235 ("\e$,4*])d\e(B" . "\e$,4(u\e(B") ; hi
236
237 ("\e$,4*H)f\e(B" . "\e$,4(w\e(B") ; ki~
238 ("\e$,4*I)f\e(B" . "\e$,4(x\e(B") ; n^i~
239 ("\e$,4*J)f\e(B" . "\e$,4(y\e(B") ; ci~
240 ("\e$,4*K)f\e(B" . "\e$,4(z\e(B") ; n~i~
241 ("\e$,4*L)f\e(B" . "\e$,4)o\e(B") ; t.i~
242 ("\e$,4*M)f\e(B" . "\e$,4)!\e(B") ; n.i~
243 ("\e$,4*N)f\e(B" . "\e$,4)"\e(B") ; ti~
244 ("\e$,4*O)f\e(B" . "\e$,4)#\e(B") ; ni~
245 ("\e$,4*P)f\e(B" . "\e$,4)$\e(B") ; pi~
246 ("\e$,4*Q)f\e(B" . "\e$,4)%\e(B") ; mi~
247 ("\e$,4*R)f\e(B" . "\e$,4)&\e(B") ; yi~
248 ("\e$,4*S)f\e(B" . "\e$,4)'\e(B") ; ri~
249 ("\e$,4*T)f\e(B" . "\e$,4)(\e(B") ; li~
250 ("\e$,4*U)f\e(B" . "\e$,4))\e(B") ; vi~
251 ("\e$,4*V)f\e(B" . "\e$,4)*\e(B") ; l_i~
252 ("\e$,4*W)f\e(B" . "\e$,4)+\e(B") ; l.i~
253 ("\e$,4*X)f\e(B" . "\e$,4),\e(B") ; r_i~
254 ("\e$,4*Y)f\e(B" . "\e$,4)-\e(B") ; n_i~
255 ("\e$,4*Z)f\e(B" . "\e$,4).\e(B") ; si~
256 ("\e$,4*[)f\e(B" . "\e$,4)/\e(B") ; s'i~
257 ("\e$,4*\)f\e(B" . "\e$,4)0\e(B") ; ji~
258 ("\e$,4*])f\e(B" . "\e$,4)1\e(B") ; hi~
259
260 ("\e$,4*H)g\e(B" . "\e$,4)p\e(B") ; ku
261 ("\e$,4*I)g\e(B" . "\e$,4)q\e(B") ; n^u
262 ("\e$,4*J)g\e(B" . "\e$,4)r\e(B") ; cu
263 ("\e$,4*K)g\e(B" . "\e$,4)s\e(B") ; n~u
264 ("\e$,4*L)g\e(B" . "\e$,4)t\e(B") ; t.u
265 ("\e$,4*M)g\e(B" . "\e$,4)u\e(B") ; n.u
266 ("\e$,4*N)g\e(B" . "\e$,4)v\e(B") ; tu
267 ("\e$,4*O)g\e(B" . "\e$,4)x\e(B") ; nu
268 ("\e$,4*P)g\e(B" . "\e$,4)y\e(B") ; pu
269 ("\e$,4*Q)g\e(B" . "\e$,4)z\e(B") ; mu
270 ("\e$,4*R)g\e(B" . "\e$,4){\e(B") ; yu
271 ("\e$,4*S)g\e(B" . "\e$,4)|\e(B") ; ru
272 ("\e$,4*T)g\e(B" . "\e$,4)}\e(B") ; lu
273 ("\e$,4*U)g\e(B" . "\e$,4)~\e(B") ; vu
274 ("\e$,4*V)g\e(B" . "\e$,4)\7f\e(B") ; l_u
275 ("\e$,4*W)g\e(B" . "\e$,4* \e(B") ; l.u
276 ("\e$,4*X)g\e(B" . "\e$,4*!\e(B") ; r_u
277 ("\e$,4*Y)g\e(B" . "\e$,4*"\e(B") ; n_u
278
279 ("\e$,4*H)h\e(B" . "\e$,4*#\e(B") ; ku~
280 ("\e$,4*I)h\e(B" . "\e$,4*$\e(B") ; n^u~
281 ("\e$,4*J)h\e(B" . "\e$,4*%\e(B") ; cu~
282 ("\e$,4*K)h\e(B" . "\e$,4*&\e(B") ; n~u~
283 ("\e$,4*L)h\e(B" . "\e$,4*'\e(B") ; t.u~
284 ("\e$,4*M)h\e(B" . "\e$,4*(\e(B") ; n.u~
285 ("\e$,4*N)h\e(B" . "\e$,4*)\e(B") ; tu~
286 ("\e$,4*O)h\e(B" . "\e$,4*+\e(B") ; nu~
287 ("\e$,4*P)h\e(B" . "\e$,4*,\e(B") ; pu~
288 ("\e$,4*Q)h\e(B" . "\e$,4*-\e(B") ; mu~
289 ("\e$,4*R)h\e(B" . "\e$,4*.\e(B") ; yu~
290 ("\e$,4*S)h\e(B" . "\e$,4*/\e(B") ; ru~
291 ("\e$,4*T)h\e(B" . "\e$,4*6\e(B") ; lu~
292 ("\e$,4*U)h\e(B" . "\e$,4*7\e(B") ; vu~
293 ("\e$,4*V)h\e(B" . "\e$,4*8\e(B") ; l_u~
294 ("\e$,4*W)h\e(B" . "\e$,4*9\e(B") ; l.u~
295 ("\e$,4*X)h\e(B" . "\e$,4*:\e(B") ; r_u~
296 ("\e$,4*Y)h\e(B" . "\e$,4*;\e(B") ; n_u~
297 ))
298
299 (defvar tml-glyph-glyph-hash
300 (let* ((hash (make-hash-table :test 'equal)))
301 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
302 tml-glyph-glyph)
303 hash))
304
305 (defvar tml-glyph-glyph-regexp
306 (tamil-regexp-of-hashtbl-keys tml-glyph-glyph-hash))
307
308 (defun tamil-compose-syllable-string (string)
309 (with-temp-buffer
310 (insert (decompose-string string))
311 (tamil-compose-syllable-region (point-min) (point-max))
312 (buffer-string)))
313
314 (defun tamil-compose-syllable-region (from to)
315 "Compose tamil syllable in region FROM to TO."
316 (let (glyph-str match-str glyph-reorder-regexps)
317 (save-excursion
318 (save-restriction
319 (narrow-to-region from to)
320 (goto-char (point-min))
321 ;; char-glyph-conversion
322 (while (not (eobp))
323 (if (looking-at tml-char-glyph-regexp)
324 (progn
325 (setq match-str (match-string 0)
326 glyph-str
327 (concat glyph-str
328 (gethash match-str tml-char-glyph-hash)))
329 (goto-char (match-end 0)))
330 (setq glyph-str (concat glyph-str (string (following-char))))
331 (forward-char 1)))
332 (or glyph-str
333 (aset glyph-str 0 (following-char)))
334 ;; glyph reordering
335 (when (string-match tml-glyph-reorder-key-glyphs glyph-str)
336 (if (string-match (car tml-glyph-reordering-regexp-list)
337 glyph-str)
338 (setq glyph-str
339 (replace-match (cdr tml-glyph-reordering-regexp-list)
340 nil nil glyph-str))))
341 ;; glyph-glyph-conversion
342 (when (string-match tml-glyph-glyph-regexp glyph-str)
343 (setq match-str (match-string 0 glyph-str))
344 (setq glyph-str
345 (replace-match (gethash match-str tml-glyph-glyph-hash)
346 nil nil glyph-str)))
347 ;; concatenate and attach reference-points.
348 (setq glyph-str
349 (cdr
350 (apply
351 'nconc
352 (mapcar
353 (function
354 (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
355 glyph-str))))
356 (compose-region from to glyph-str)))))
357
358 ;;;###autoload
359 (defun tamil-composition-function (pos &optional string)
360 "Compose Tamil characters after the position POS.
361 If STRING is not nil, it is a string, and POS is an index to the string.
362 In this case, compose characters after POS of the string."
363 (if string
364 (if auto-compose-current-font
365 (if (eq (string-match "[\e$,1< \e(B-\e$,1=?\e(B]+" pos) pos)
366 (or (font-shape-text 0 (match-end 0) auto-compose-current-font
367 string)
368 pos)))
369 (goto-char pos)
370 (if auto-compose-current-font
371 (if (looking-at "[\e$,1< \e(B-\e$,1=?\e(B]+")
372 (or (font-shape-text pos (match-end 0) auto-compose-current-font))
373 (if (looking-at tamil-composable-pattern)
374 (prog1 (match-end 0)
375 (tamil-compose-syllable-region pos (match-end 0))))))))
376
377 (provide 'tml-util)
378
379 ;;; arch-tag: 4d1c9737-e7b1-44cf-a040-4f64c50e773e
380 ;;; tml-util.el ends here