Add arch taglines
[bpt/emacs.git] / lisp / language / tml-util.el
CommitLineData
585eb076
KH
1;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*-
2
3;; Copyright (C) 2001 Free Software Foundation, Inc.
4
5;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6;; Keywords: multilingual, Indian, Tamil
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to the
22;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23;; Boston, MA 02111-1307, USA.
24
25;; Created: Nov. 08. 2002
26
27;;; Commentary:
28
29;; This file provides character(Unicode) to glyph(CDAC) conversion and
30;; composition of Tamil script characters.
31
32;;; Code:
33
34;; Tamil Composable Pattern
35;; C .. Consonants
36;; V .. Vowel
37;; H .. Pulli
38;; M .. Matra
39;; V .. Vowel
40;; A .. Anuswar
41;; D .. Chandrabindu
42;; 1. vowel
43;; V
44;; 2. syllable : only ligature-formed pattern forms composition.
45;; (CkHCs|C)(H|M)?
46;; 3. sri special
47;; (CsHCrVi)
48
49;; oririnal
50;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
51
52(defconst tamil-consonant
53 "[\e$,1<5\e(B-\e$,1<Y\e(B]")
54
55(defconst tamil-composable-pattern
56 (concat
57 "\\([\e$,1<%\e(B-\e$,1<4\e(B]\\)\\|"
58 "[\e$,1<"<#\e(B]\\|" ;; vowel modifier considered independent
59 "\\(\\(?:\\(?:\e$,1<5<m<W\e(B\\)\\|[\e$,1<5\e(B-\e$,1<Y\e(B]\\)[\e$,1<m<^\e(B-\e$,1<l\e(B]?\\)\\|"
60 "\\(\e$,1<W<m<P<`\e(B\\)")
61 "Regexp matching a composable sequence of Tamil characters.")
62
63;;;###autoload
64(defun tamil-compose-region (from to)
65 (interactive "r")
66 (save-excursion
67 (save-restriction
68 (narrow-to-region from to)
69 (goto-char (point-min))
70 (while (re-search-forward tamil-composable-pattern nil t)
71 (tamil-compose-syllable-region (match-beginning 0)
72 (match-end 0))))))
73(defun tamil-compose-string (string)
74 (with-temp-buffer
75 (insert (decompose-string string))
76 (tamil-compose-region (point-min) (point-max))
77 (buffer-string)))
78
79(defun tamil-post-read-conversion (len)
80 (save-excursion
81 (save-restriction
82 (let ((buffer-modified-p (buffer-modified-p)))
83 (narrow-to-region (point) (+ (point) len))
84 (tamil-compose-region (point-min) (point-max))
85 (set-buffer-modified-p buffer-modified-p)
86 (- (point-max) (point-min))))))
87
88(defun tamil-range (from to)
89 "Make the list of the integers of range FROM to TO."
90 (let (result)
91 (while (<= from to) (setq result (cons to result) to (1- to))) result))
92
93(defun tamil-regexp-of-hashtbl-keys (hashtbl)
94 "Return a regular expression that matches all keys in hashtable HASHTBL."
95 (let ((max-specpdl-size 1000))
96 (regexp-opt
97 (sort
98 (let (dummy)
99 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
100 dummy)
101 (function (lambda (x y) (> (length x) (length y))))))))
102
103
104;;;###autoload
105(defun tamil-composition-function (from to pattern &optional string)
106 "Compose Tamil characters in REGION, or STRING if specified.
107Assume that the REGION or STRING must fully match the composable
108PATTERN regexp."
109 (if string (tamil-compose-syllable-string string)
110 (tamil-compose-syllable-region from to))
111 (- to from))
112
113;; Register a function to compose Tamil characters.
114(mapc
115 (function (lambda (ucs)
116 (aset composition-function-table (decode-char 'ucs ucs)
117 (list (cons tamil-composable-pattern
118 'tamil-composition-function)))))
119 (nconc '(#x0b82 #x0b83) (tamil-range #x0b85 #x0bb9)))
120
121;; Notes on conversion steps.
122
123;; 1. chars to glyphs
124;; Simple replacement of characters to glyphs is done.
125
126;; 2. glyphs reordering.
127;; following "\e$,4)j\e(B", "\e$,4)k\e(B", "\e$,4)l\e(B" goes to the front.
128
129;; 3. glyphs to glyphs
130;; reordered vowels are ligatured to consonants.
131
132;; 4. Composition.
133;; left modifiers will be attached at the left.
134;; others will be attached right.
135
136(defvar tml-char-glyph
137 '(;; various signs
138 ;;("\e$,1<"\e(B" . "")
139 ("\e$,1<#\e(B" . "\e$,4*G\e(B")
140 ;; Independent Vowels
141 ("\e$,1<%\e(B" . "\e$,4*<\e(B")
142 ("\e$,1<&\e(B" . "\e$,4*=\e(B")
143 ("\e$,1<'\e(B" . "\e$,4*>\e(B")
144 ("\e$,1<(\e(B" . "\e$,4*?\e(B")
145 ("\e$,1<)\e(B" . "\e$,4*@\e(B")
146 ("\e$,1<*\e(B" . "\e$,4*A\e(B")
147 ("\e$,1<.\e(B" . "\e$,4*B\e(B")
148 ("\e$,1</\e(B" . "\e$,4*C\e(B")
149 ("\e$,1<0\e(B" . "\e$,4*D\e(B")
150 ("\e$,1<2\e(B" . "\e$,4*E\e(B")
151 ("\e$,1<3\e(B" . "\e$,4*F\e(B")
152 ("\e$,1<4\e(B" . "\e$,4*E*W\e(B")
153 ;; Consonants
154 ("\e$,1<5<m<W<m\e(B" . "\e$,4):\e(B") ; ks.
155 ("\e$,1<5<m<W\e(B" . "\e$,4*^\e(B") ; ks
156 ("\e$,1<5\e(B" . "\e$,4*H\e(B")
157
158 ("\e$,1<9\e(B" . "\e$,4*I\e(B")
159 ("\e$,1<:\e(B" . "\e$,4*J\e(B")
160 ("\e$,1<<\e(B" . "\e$,4*\\e(B")
161 ("\e$,1<<<m\e(B" . "\e$,4)8\e(B")
162 ("\e$,1<>\e(B" . "\e$,4*K\e(B")
163 ("\e$,1<?\e(B" . "\e$,4*L\e(B")
164 ("\e$,1<C\e(B" . "\e$,4*M\e(B")
165 ("\e$,1<D\e(B" . "\e$,4*N\e(B")
166 ("\e$,1<H\e(B" . "\e$,4*O\e(B")
167 ("\e$,1<I\e(B" . "\e$,4*Y\e(B")
168 ("\e$,1<I<m\e(B" . "\e$,4)a\e(B")
169 ("\e$,1<J\e(B" . "\e$,4*P\e(B")
170 ("\e$,1<N\e(B" . "\e$,4*Q\e(B")
171 ("\e$,1<O\e(B" . "\e$,4*R\e(B")
172 ("\e$,1<P\e(B" . "\e$,4*S\e(B")
173 ("\e$,1<Q\e(B" . "\e$,4*X\e(B")
174 ("\e$,1<R\e(B" . "\e$,4*T\e(B")
175 ("\e$,1<S\e(B" . "\e$,4*W\e(B")
176 ("\e$,1<T\e(B" . "\e$,4*V\e(B")
177 ("\e$,1<U\e(B" . "\e$,4*U\e(B")
178 ("\e$,1<W\e(B" . "\e$,4*[\e(B")
179 ("\e$,1<W<m\e(B" . "\e$,4)7\e(B")
180 ("\e$,1<W<m<P<`\e(B" . "\e$,4*_\e(B")
181 ("\e$,1<X\e(B" . "\e$,4*Z\e(B")
182 ("\e$,1<X<m\e(B" . "\e$,4)6\e(B")
183 ("\e$,1<Y\e(B" . "\e$,4*]\e(B")
184 ("\e$,1<Y<m\e(B" . "\e$,4)9\e(B")
185
186 ;; Dependent vowel signs
187 ("\e$,1<^\e(B" . "\e$,4)c\e(B")
188 ("\e$,1<_\e(B" . "\e$,4)d\e(B")
189 ("\e$,1<`\e(B" . "\e$,4)f\e(B")
190 ("\e$,1<a\e(B" . "\e$,4)g\e(B")
191 ("\e$,1<b\e(B" . "\e$,4)h\e(B")
192 ("\e$,1<f\e(B" . "\e$,4)j\e(B")
193 ("\e$,1<g\e(B" . "\e$,4)k\e(B")
194 ("\e$,1<h\e(B" . "\e$,4)l\e(B")
195 ("\e$,1<j\e(B" . "\e$,4)j)c\e(B")
196 ("\e$,1<k\e(B" . "\e$,4)k)c\e(B")
197 ("\e$,1<l\e(B" . "\e$,4)j*W\e(B")
198
199 ;; Various signs
200 ("\e$,1<m\e(B" . "\e$,4)b\e(B")
201 ("\e$,1<w\e(B" . "nil") ;; not supported?
202 ))
203
204(defvar tml-char-glyph-hash
205 (let* ((hash (make-hash-table :test 'equal)))
206 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
207 tml-char-glyph)
208 hash))
209
210(defvar tml-char-glyph-regexp
211 (tamil-regexp-of-hashtbl-keys tml-char-glyph-hash))
212
213;; Tamil languages needed to be reordered.
214
215(defvar tml-consonants-regexp
216 "[\e$,4*H*^*I*J*\*K*L*M*N*O*Y*P*Q*R*S*X*T*W*V*U*[*Z*]\e(B]")
217
218(defvar tml-glyph-reorder-key-glyphs "[\e$,4)j)k)l\e(B]")
219
220(defvar tml-glyph-reordering-regexp-list
221 (cons
222 (concat "\\(" tml-consonants-regexp "\\)\\([\e$,4)j)k)l\e(B]\\)") "\\2\\1"))
223
224;; Tamil vowel modifiers to be ligatured.
225(defvar tml-glyph-glyph
226 '(
227 ("\e$,4*H)d\e(B" . "\e$,4(a\e(B") ; ki
228 ("\e$,4*^)d\e(B" . "\e$,4(v\e(B") ; ksi
229 ("\e$,4*^)f\e(B" . "\e$,4)2\e(B") ; ksi~
230 ("\e$,4*I)d\e(B" . "\e$,4(b\e(B") ; n^i
231 ("\e$,4*J)d\e(B" . "\e$,4(c\e(B") ; ci
232 ("\e$,4*K)d\e(B" . "\e$,4(d\e(B") ; n~i
233 ("\e$,4*L)d\e(B" . "\e$,4)n\e(B") ; t.i
234 ("\e$,4*M)d\e(B" . "\e$,4(e\e(B") ; n.i
235 ("\e$,4*N)d\e(B" . "\e$,4(f\e(B") ; ti
236 ("\e$,4*O)d\e(B" . "\e$,4(g\e(B") ; ni
237 ("\e$,4*P)d\e(B" . "\e$,4(h\e(B") ; pi
238 ("\e$,4*Q)d\e(B" . "\e$,4(i\e(B") ; mi
239 ("\e$,4*R)d\e(B" . "\e$,4(j\e(B") ; yi
240 ("\e$,4*S)d\e(B" . "\e$,4(k\e(B") ; ri
241 ("\e$,4*T)d\e(B" . "\e$,4(l\e(B") ; li
242 ("\e$,4*U)d\e(B" . "\e$,4(m\e(B") ; vi
243 ("\e$,4*V)d\e(B" . "\e$,4(n\e(B") ; l_i
244 ("\e$,4*W)d\e(B" . "\e$,4(o\e(B") ; l.i
245 ("\e$,4*X)d\e(B" . "\e$,4(p\e(B") ; r_i
246 ("\e$,4*Y)d\e(B" . "\e$,4(q\e(B") ; n_i
247 ("\e$,4*Z)d\e(B" . "\e$,4(r\e(B") ; si
248 ("\e$,4*[)d\e(B" . "\e$,4(s\e(B") ; s'i
249 ("\e$,4*\)d\e(B" . "\e$,4(t\e(B") ; ji
250 ("\e$,4*])d\e(B" . "\e$,4(u\e(B") ; hi
251
252 ("\e$,4*H)f\e(B" . "\e$,4(w\e(B") ; ki~
253 ("\e$,4*I)f\e(B" . "\e$,4(x\e(B") ; n^i~
254 ("\e$,4*J)f\e(B" . "\e$,4(y\e(B") ; ci~
255 ("\e$,4*K)f\e(B" . "\e$,4(z\e(B") ; n~i~
256 ("\e$,4*L)f\e(B" . "\e$,4)o\e(B") ; t.i~
257 ("\e$,4*M)f\e(B" . "\e$,4)!\e(B") ; n.i~
258 ("\e$,4*N)f\e(B" . "\e$,4)"\e(B") ; ti~
259 ("\e$,4*O)f\e(B" . "\e$,4)#\e(B") ; ni~
260 ("\e$,4*P)f\e(B" . "\e$,4)$\e(B") ; pi~
261 ("\e$,4*Q)f\e(B" . "\e$,4)%\e(B") ; mi~
262 ("\e$,4*R)f\e(B" . "\e$,4)&\e(B") ; yi~
263 ("\e$,4*S)f\e(B" . "\e$,4)'\e(B") ; ri~
264 ("\e$,4*T)f\e(B" . "\e$,4)(\e(B") ; li~
265 ("\e$,4*U)f\e(B" . "\e$,4))\e(B") ; vi~
266 ("\e$,4*V)f\e(B" . "\e$,4)*\e(B") ; l_i~
267 ("\e$,4*W)f\e(B" . "\e$,4)+\e(B") ; l.i~
268 ("\e$,4*X)f\e(B" . "\e$,4),\e(B") ; r_i~
269 ("\e$,4*Y)f\e(B" . "\e$,4)-\e(B") ; n_i~
270 ("\e$,4*Z)f\e(B" . "\e$,4).\e(B") ; si~
271 ("\e$,4*[)f\e(B" . "\e$,4)/\e(B") ; s'i~
272 ("\e$,4*\)f\e(B" . "\e$,4)0\e(B") ; ji~
273 ("\e$,4*])f\e(B" . "\e$,4)1\e(B") ; hi~
274
275 ("\e$,4*H)g\e(B" . "\e$,4)p\e(B") ; ku
276 ("\e$,4*I)g\e(B" . "\e$,4)q\e(B") ; n^u
277 ("\e$,4*J)g\e(B" . "\e$,4)r\e(B") ; cu
278 ("\e$,4*K)g\e(B" . "\e$,4)s\e(B") ; n~u
279 ("\e$,4*L)g\e(B" . "\e$,4)t\e(B") ; t.u
280 ("\e$,4*M)g\e(B" . "\e$,4)u\e(B") ; n.u
281 ("\e$,4*N)g\e(B" . "\e$,4)v\e(B") ; tu
282 ("\e$,4*O)g\e(B" . "\e$,4)x\e(B") ; nu
283 ("\e$,4*P)g\e(B" . "\e$,4)y\e(B") ; pu
284 ("\e$,4*Q)g\e(B" . "\e$,4)z\e(B") ; mu
285 ("\e$,4*R)g\e(B" . "\e$,4){\e(B") ; yu
286 ("\e$,4*S)g\e(B" . "\e$,4)|\e(B") ; ru
287 ("\e$,4*T)g\e(B" . "\e$,4)}\e(B") ; lu
288 ("\e$,4*U)g\e(B" . "\e$,4)~\e(B") ; vu
289 ("\e$,4*V)g\e(B" . "\e$,4)\7f\e(B") ; l_u
290 ("\e$,4*W)g\e(B" . "\e$,4* \e(B") ; l.u
291 ("\e$,4*X)g\e(B" . "\e$,4*!\e(B") ; r_u
292 ("\e$,4*Y)g\e(B" . "\e$,4*"\e(B") ; n_u
293
294 ("\e$,4*H)h\e(B" . "\e$,4*#\e(B") ; ku~
295 ("\e$,4*I)h\e(B" . "\e$,4*$\e(B") ; n^u~
296 ("\e$,4*J)h\e(B" . "\e$,4*%\e(B") ; cu~
297 ("\e$,4*K)h\e(B" . "\e$,4*&\e(B") ; n~u~
298 ("\e$,4*L)h\e(B" . "\e$,4*'\e(B") ; t.u~
299 ("\e$,4*M)h\e(B" . "\e$,4*(\e(B") ; n.u~
300 ("\e$,4*N)h\e(B" . "\e$,4*)\e(B") ; tu~
301 ("\e$,4*O)h\e(B" . "\e$,4*+\e(B") ; nu~
302 ("\e$,4*P)h\e(B" . "\e$,4*,\e(B") ; pu~
303 ("\e$,4*Q)h\e(B" . "\e$,4*-\e(B") ; mu~
304 ("\e$,4*R)h\e(B" . "\e$,4*.\e(B") ; yu~
305 ("\e$,4*S)h\e(B" . "\e$,4*/\e(B") ; ru~
306 ("\e$,4*T)h\e(B" . "\e$,4*6\e(B") ; lu~
307 ("\e$,4*U)h\e(B" . "\e$,4*7\e(B") ; vu~
308 ("\e$,4*V)h\e(B" . "\e$,4*8\e(B") ; l_u~
309 ("\e$,4*W)h\e(B" . "\e$,4*9\e(B") ; l.u~
310 ("\e$,4*X)h\e(B" . "\e$,4*:\e(B") ; r_u~
311 ("\e$,4*Y)h\e(B" . "\e$,4*;\e(B") ; n_u~
312 ))
313
314(defvar tml-glyph-glyph-hash
315 (let* ((hash (make-hash-table :test 'equal)))
316 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
317 tml-glyph-glyph)
318 hash))
319
320(defvar tml-glyph-glyph-regexp
321 (tamil-regexp-of-hashtbl-keys tml-glyph-glyph-hash))
322
323(defun tamil-compose-syllable-string (string)
324 (with-temp-buffer
325 (insert (decompose-string string))
326 (tamil-compose-syllable-region (point-min) (point-max))
327 (buffer-string)))
328
329(defun tamil-compose-syllable-region (from to)
330 "Compose tamil syllable in region FROM to TO."
331 (let (glyph-str match-str glyph-reorder-regexps)
332 (save-excursion
333 (save-restriction
334 (narrow-to-region from to)
335 (goto-char (point-min))
336 ;; char-glyph-conversion
337 (while (re-search-forward tml-char-glyph-regexp nil t)
338 (setq match-str (match-string 0))
339 (setq glyph-str
340 (concat glyph-str (gethash match-str tml-char-glyph-hash))))
341 ;; glyph reordering
342 (when (string-match tml-glyph-reorder-key-glyphs glyph-str)
343 (if (string-match (car tml-glyph-reordering-regexp-list)
344 glyph-str)
345 (setq glyph-str
346 (replace-match (cdr tml-glyph-reordering-regexp-list)
347 nil nil glyph-str))))
348 ;; glyph-glyph-conversion
349 (when (string-match tml-glyph-glyph-regexp glyph-str)
350 (setq match-str (match-string 0 glyph-str))
351 (setq glyph-str
352 (replace-match (gethash match-str tml-glyph-glyph-hash)
353 nil nil glyph-str)))
354 ;; concatenate and attach reference-points.
355 (setq glyph-str
356 (cdr
357 (apply
358 'nconc
359 (mapcar
360 (function
361 (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
362 glyph-str))))
363 (compose-region from to glyph-str)))))
364
365(provide 'tml-util)
366
ab5796a9 367;;; arch-tag: 4d1c9737-e7b1-44cf-a040-4f64c50e773e
585eb076 368;;; tml-util.el ends here