Add arch taglines
[bpt/emacs.git] / lisp / language / mlm-util.el
1 ;;; mlm-util.el --- support for composing malayalam characters -*-coding: iso-2022-7bit;-*-
2
3 ;; Copyright (C) 2003 Free Software Foundation, Inc.
4
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6 ;; Keywords: multilingual, Malayalam
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;; Created: Feb. 11. 2003
26
27 ;;; Commentary:
28
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
30 ;; composition of Malayalam script characters.
31
32 ;;; Code:
33
34 ;; Malayalam Composable Pattern
35 ;; C .. Consonants
36 ;; V .. Vowel
37 ;; H .. Halant
38 ;; M .. Matra
39 ;; V .. Vowel
40 ;; A .. Anuswar
41 ;; D .. Chandrabindu
42 ;; (N .. Zerowidth Non Joiner)
43 ;; (J .. Zerowidth Joiner. )
44 ;; 1. vowel
45 ;; V(A|visargam)?
46 ;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya)
47 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
48
49 (defconst malayalam-consonant
50 "[\e$,1@5\e(B-\e$,1@Y\e(B]")
51
52 (defconst malayalam-composable-pattern
53 (concat
54 "\\([\e$,1@%\e(B-\e$,1@4\e(B][\e$,1@"\e(B]?\\)\\|\e$,1@#\e(B"
55 "\\|\\("
56 "\\(?:\\(?:[\e$,1@5\e(B-\e$,1@Y\e(B]\e$,1@m\e(B\\)?\\(?:[\e$,1@5\e(B-\e$,1@Y\e(B]\e$,1@m\e(B\\)?\\(?:[\e$,1@5\e(B-\e$,1@Y\e(B]\e$,1@m\e(B\\)?[\e$,1@5\e(B-\e$,1@Y\e(B]\e$,1@m\e(B\\)?"
57 "[\e$,1@5\e(B-\e$,1@Y\e(B]\\(?:\e$,1@m\e(B\\|[\e$,1@^\e(B-\e$,1@c@f@g@h@j@j@k@l\e(B]?[\e$,1@"@m\e(B]?\\)?"
58 "\\)")
59 "Regexp matching a composable sequence of Malayalam characters.")
60
61 ;;;###autoload
62 (defun malayalam-compose-region (from to)
63 (interactive "r")
64 (save-excursion
65 (save-restriction
66 (narrow-to-region from to)
67 (goto-char (point-min))
68 (while (re-search-forward malayalam-composable-pattern nil t)
69 (malayalam-compose-syllable-region (match-beginning 0)
70 (match-end 0))))))
71 (defun malayalam-compose-string (string)
72 (with-temp-buffer
73 (insert (decompose-string string))
74 (malayalam-compose-region (point-min) (point-max))
75 (buffer-string)))
76
77 (defun malayalam-post-read-conversion (len)
78 (save-excursion
79 (save-restriction
80 (let ((buffer-modified-p (buffer-modified-p)))
81 (narrow-to-region (point) (+ (point) len))
82 (malayalam-compose-region (point-min) (point-max))
83 (set-buffer-modified-p buffer-modified-p)
84 (- (point-max) (point-min))))))
85
86 (defun malayalam-range (from to)
87 "Make the list of the integers of range FROM to TO."
88 (let (result)
89 (while (<= from to) (setq result (cons to result) to (1- to))) result))
90
91 (defun malayalam-regexp-of-hashtbl-keys (hashtbl)
92 "Return a regular expression that matches all keys in hashtable HASHTBL."
93 (let ((max-specpdl-size 1000))
94 (regexp-opt
95 (sort
96 (let (dummy)
97 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
98 dummy)
99 (function (lambda (x y) (> (length x) (length y))))))))
100
101
102 ;;;###autoload
103 (defun malayalam-composition-function (from to pattern &optional string)
104 "Compose Malayalam characters in REGION, or STRING if specified.
105 Assume that the REGION or STRING must fully match the composable
106 PATTERN regexp."
107 (if string (malayalam-compose-syllable-string string)
108 (malayalam-compose-syllable-region from to))
109 (- to from))
110
111 ;; Register a function to compose Malayalam characters.
112 (mapc
113 (function (lambda (ucs)
114 (aset composition-function-table (decode-char 'ucs ucs)
115 (list (cons malayalam-composable-pattern
116 'malayalam-composition-function)))))
117 (nconc '(#x0d02 #x0d03) (malayalam-range #x0d05 #x0d39)))
118
119 ;; Notes on conversion steps.
120
121 ;; 1. chars to glyphs
122 ;;
123 ;; Simple replacement of characters to glyphs is done.
124
125 ;; 2. glyphs reordering.
126 ;;
127 ;; Two special reordering rule takes place.
128 ;; a. following "\e$,46[\e(B" goes to the front.
129 ;; b. following "\e$,46S6S\e(B", "\e$,46S\e(B" or "\e$,46T\e(B" goes to the front.
130 ;; This reordering occurs only to the last cluster of consonants.
131 ;; Preceding consonants with halant characters are not affected.
132
133 ;; 3. Composition.
134 ;;
135 ;; left modifiers will be attached at the left.
136 ;; others will be attached right.
137
138 (defvar mlm-char-glyph
139 '(;; various signs
140 ("\e$,1@"\e(B" . "\e$,46W\e(B")
141 ("\e$,1@#\e(B" . "\e$,46X\e(B")
142 ;; Independent Vowels
143 ("\e$,1@%\e(B" . "\e$,46!\e(B")
144 ("\e$,1@&\e(B" . "\e$,46"\e(B")
145 ("\e$,1@'\e(B" . "\e$,46#\e(B")
146 ("\e$,1@(\e(B" . "\e$,46#6U\e(B")
147 ("\e$,1@)\e(B" . "\e$,46$\e(B")
148 ("\e$,1@*\e(B" . "\e$,46$6U\e(B")
149 ("\e$,1@+\e(B" . "\e$,46%\e(B")
150 ("\e$,1@,\e(B" . "nil") ;; not in present use, not supported.
151 ("\e$,1@.\e(B" . "\e$,46&\e(B")
152 ("\e$,1@/\e(B" . "\e$,46'\e(B")
153 ("\e$,1@0\e(B" . "\e$,46S6&\e(B")
154 ("\e$,1@2\e(B" . "\e$,46(\e(B")
155 ("\e$,1@3\e(B" . "\e$,46(6M\e(B")
156 ("\e$,1@4\e(B" . "\e$,46(6U\e(B")
157 ;; Consonants
158 ("\e$,1@5\e(B" . "\e$,46)\e(B")
159 ("\e$,1@5@m@5\e(B" . "\e$,47!\e(B")
160 ("\e$,1@5@m@S\e(B" . "\e$,47"\e(B")
161 ("\e$,1@5@m@W\e(B" . "\e$,47#\e(B")
162 ("\e$,1@5@m@?\e(B" . "\e$,47N\e(B")
163 ("\e$,1@5@m@D\e(B" . "\e$,47`\e(B")
164 ("\e$,1@5@a\e(B" . "\e$,47f\e(B")
165 ("\e$,1@5@m@5@a\e(B" . "\e$,47g\e(B")
166 ("\e$,1@5@a\e(B" . "\e$,47f\e(B")
167 ("\e$,1@5@m@5@a\e(B" . "\e$,47g\e(B")
168
169 ("\e$,1@6\e(B" . "\e$,46*\e(B")
170
171 ("\e$,1@7\e(B" . "\e$,46+\e(B")
172 ("\e$,1@7@m@7\e(B" . "\e$,47$\e(B")
173 ("\e$,1@7@m@R\e(B" . "\e$,47%\e(B")
174 ("\e$,1@7@m@N\e(B" . "\e$,47\\e(B")
175 ("\e$,1@7@m@H\e(B" . "\e$,47a\e(B")
176
177 ("\e$,1@8\e(B" . "\e$,46,\e(B")
178
179 ("\e$,1@9\e(B" . "\e$,46-\e(B")
180 ("\e$,1@9@m@5\e(B" . "\e$,47&\e(B")
181 ("\e$,1@9@m@9\e(B" . "\e$,47'\e(B")
182 ("\e$,1@9@m@5@a\e(B" . "\e$,47h\e(B")
183
184 ("\e$,1@:\e(B" . "\e$,46.\e(B")
185 ("\e$,1@:@m@:\e(B" . "\e$,47(\e(B") ;; duplicate
186 ("\e$,1@:@m@;\e(B" . "\e$,47Q\e(B")
187
188 ("\e$,1@;\e(B" . "\e$,46/\e(B")
189
190 ("\e$,1@<\e(B" . "\e$,460\e(B")
191 ("\e$,1@<@m@<\e(B" . "\e$,47V\e(B")
192 ("\e$,1@<@m@>\e(B" . "\e$,47Z\e(B")
193
194 ("\e$,1@=\e(B" . "\e$,461\e(B")
195
196 ("\e$,1@>\e(B" . "\e$,462\e(B")
197 ("\e$,1@>@m@:\e(B" . "\e$,47)\e(B")
198 ("\e$,1@>@m@>\e(B" . "\e$,47*\e(B")
199
200 ("\e$,1@?\e(B" . "\e$,463\e(B")
201 ("\e$,1@?@m@?\e(B" . "\e$,47+\e(B")
202
203 ("\e$,1@@\e(B" . "\e$,464\e(B")
204 ("\e$,1@A\e(B" . "\e$,465\e(B")
205 ("\e$,1@A@m@A\e(B" . "\e$,47M\e(B")
206 ("\e$,1@B\e(B" . "\e$,466\e(B")
207
208 ("\e$,1@C\e(B" . "\e$,467\e(B")
209 ("\e$,1@C@a@m\e(B" . "\e$,47,\e(B") ;; half consonant
210 ("\e$,1@C@m@?\e(B" . "\e$,47-\e(B")
211 ("\e$,1@C@m@C\e(B" . "\e$,47.\e(B")
212 ("\e$,1@C@m@N\e(B" . "\e$,47W\e(B")
213 ("\e$,1@C@m@A\e(B" . "\e$,47^\e(B")
214 ("\e$,1@C@a\e(B" . "\e$,47i\e(B")
215
216 ("\e$,1@D\e(B" . "\e$,468\e(B")
217 ("\e$,1@D@m@D\e(B" . "\e$,47/\e(B")
218 ("\e$,1@D@m@E\e(B" . "\e$,470\e(B")
219 ("\e$,1@D@m@X\e(B" . "\e$,47U\e(B")
220 ("\e$,1@D@m@M\e(B" . "\e$,47[\e(B")
221 ("\e$,1@D@m@N\e(B" . "\e$,47_\e(B")
222
223 ("\e$,1@E\e(B" . "\e$,469\e(B")
224
225 ("\e$,1@F\e(B" . "\e$,46:\e(B")
226 ("\e$,1@F@m@F\e(B" . "\e$,471\e(B")
227 ("\e$,1@F@m@G\e(B" . "\e$,472\e(B")
228
229 ("\e$,1@G\e(B" . "\e$,46;\e(B")
230
231 ("\e$,1@H\e(B" . "\e$,46<\e(B")
232 ("\e$,1@H@a@m\e(B" . "\e$,473\e(B") ;; half consonant
233 ("\e$,1@H@m@D\e(B" . "\e$,474\e(B")
234 ("\e$,1@H@m@F\e(B" . "\e$,475\e(B")
235 ("\e$,1@H@m@H\e(B" . "\e$,476\e(B")
236 ("\e$,1@H@m@N\e(B" . "\e$,477\e(B")
237 ("\e$,1@H@m@G\e(B" . "\e$,47T\e(B")
238 ("\e$,1@H@m@E\e(B" . "\e$,47Y\e(B")
239 ("\e$,1@H@m@Q\e(B" . "\e$,47b\e(B")
240 ("\e$,1@H@a\e(B" . "\e$,47k\e(B")
241 ("\e$,1@H@m@H@a\e(B" . "\e$,47l\e(B")
242
243 ("\e$,1@J\e(B" . "\e$,46=\e(B")
244 ("\e$,1@J@m@J\e(B" . "\e$,478\e(B") ;; duplicate
245 ("\e$,1@J@m@R\e(B" . "\e$,479\e(B") ;; lakar
246
247 ("\e$,1@K\e(B" . "\e$,46>\e(B")
248
249 ("\e$,1@L\e(B" . "\e$,46?\e(B")
250 ("\e$,1@L@m@L\e(B" . "\e$,47:\e(B") ;; duplicate
251 ("\e$,1@L@m@R\e(B" . "\e$,47;\e(B") ;; lakar
252 ("\e$,1@L@m@G\e(B" . "\e$,47O\e(B")
253 ("\e$,1@L@m@F\e(B" . "\e$,47P\e(B")
254
255 ("\e$,1@M\e(B" . "\e$,46@\e(B")
256
257 ("\e$,1@N\e(B" . "\e$,46A\e(B")
258 ("\e$,1@N@m@J\e(B" . "\e$,47<\e(B")
259 ("\e$,1@N@m@N\e(B" . "\e$,47=\e(B")
260 ("\e$,1@N@m@R\e(B" . "\e$,47>\e(B") ;; lakar
261
262 ("\e$,1@O\e(B" . "\e$,46B\e(B")
263 ("\e$,1@O@m@O\e(B" . "\e$,47?\e(B") ;; duplicate
264 ("\e$,1@O@m@5@m@5\e(B" . "\e$,47m\e(B")
265
266 ("\e$,1@P\e(B" . "\e$,46C\e(B")
267 ("\e$,1@P@a@m\e(B" . "\e$,47@\e(B")
268 ("\e$,1@P@a\e(B" . "\e$,47j\e(B")
269
270 ("\e$,1@Q\e(B" . "\e$,46D\e(B")
271 ("\e$,1@Q@m\e(B" . "\e$,47@\e(B") ;; same glyph as "\e$,1@P@m\e(B"
272 ("\e$,1@Q@a@m\e(B" . "\e$,47@\e(B") ;; same glyph as "\e$,1@P@m\e(B"
273 ;;("\e$,1@Q@m@Q\e(B" . "\e$,47A\e(B")
274 ("\e$,1@Q@m@Q\e(B" . "\e$,47d\e(B")
275
276 ("\e$,1@R\e(B" . "\e$,46E\e(B")
277 ("\e$,1@R@a@m\e(B" . "\e$,47B\e(B")
278 ("\e$,1@R@m@R\e(B" . "\e$,47C\e(B") ;; lakar
279 ("\e$,1@R@m@J\e(B" . "\e$,47e\e(B")
280
281 ("\e$,1@S\e(B" . "\e$,46F\e(B")
282 ("\e$,1@S@a@m\e(B" . "\e$,47D\e(B")
283 ("\e$,1@S@m@S\e(B" . "\e$,47E\e(B")
284
285 ("\e$,1@T\e(B" . "\e$,46G\e(B")
286
287 ("\e$,1@U\e(B" . "\e$,46H\e(B")
288 ("\e$,1@U@m@U\e(B" . "\e$,47F\e(B")
289
290 ("\e$,1@V\e(B" . "\e$,46I\e(B")
291 ("\e$,1@V@m@R\e(B" . "\e$,47G\e(B")
292 ("\e$,1@V@m@V\e(B" . "\e$,47H\e(B")
293 ("\e$,1@V@m@:\e(B" . "\e$,47]\e(B")
294
295 ("\e$,1@W\e(B" . "\e$,46J\e(B")
296 ("\e$,1@W@m@?\e(B" . "\e$,47c\e(B")
297
298 ("\e$,1@X\e(B" . "\e$,46K\e(B")
299 ("\e$,1@X@m@R\e(B" . "\e$,47I\e(B")
300 ("\e$,1@X@m@X\e(B" . "\e$,47J\e(B")
301 ("\e$,1@X@m@Q@m@Q\e(B" . "\e$,47L\e(B")
302 ("\e$,1@X@m@E\e(B" . "\e$,47X\e(B")
303
304 ("\e$,1@Y\e(B" . "\e$,46L\e(B")
305 ("\e$,1@Y@m@R\e(B" . "\e$,47K\e(B")
306 ("\e$,1@Y@m@N\e(B" . "\e$,47R\e(B")
307 ("\e$,1@Y@m@H\e(B" . "\e$,47S\e(B")
308
309 ;; Dependent vowel signs
310 ("\e$,1@^\e(B" . "\e$,46M\e(B")
311 ("\e$,1@_\e(B" . "\e$,46N\e(B")
312 ("\e$,1@`\e(B" . "\e$,46O\e(B")
313 ("\e$,1@a\e(B" . "\e$,46P\e(B")
314 ("\e$,1@b\e(B" . "\e$,46Q\e(B")
315 ("\e$,1@c\e(B" . "\e$,46R\e(B")
316 ("\e$,1@f\e(B" . "\e$,46S\e(B")
317 ("\e$,1@g\e(B" . "\e$,46T\e(B")
318 ("\e$,1@h\e(B" . "\e$,46S6S\e(B")
319 ("\e$,1@j\e(B" . "\e$,46S6M\e(B")
320 ("\e$,1@k\e(B" . "\e$,46T6M\e(B")
321 ("\e$,1@l\e(B" . "\e$,46U\e(B")
322 ;; Various signs
323 ("\e$,1@m\e(B" . "\e$,46V\e(B")
324 ("\e$,1@m@O\e(B" . "\e$,46Y\e(B") ;; yakar
325 ("\e$,1@m@O@a\e(B" . "\e$,46\\e(B") ;; yakar + u
326 ("\e$,1@m@O@b\e(B" . "\e$,46]\e(B") ;; yakar + uu
327 ("\e$,1@m@U\e(B" . "\e$,46Z\e(B") ;; vakar modifier
328 ("\e$,1@m@P\e(B" . "\e$,46[\e(B") ;; rakar modifier is the same to rra modifier.
329 ("\e$,1@m@P@m\e(B" . "\e$,46R\e(B") ;; halant + rakar + halant
330 ("\e$,1@m@Q\e(B" . "\e$,46[\e(B") ;; rrakar modifier
331 ("\e$,1@m@Q@m\e(B" . "\e$,46R\e(B") ;; halant + rrakar + halant
332 ("\e$,1@m@m\e(B" . "\e$,46V\e(B") ;; double omission sign to stop forming half consonant.
333 ("\e$,1@w\e(B" . "\e$,46U\e(B") ;; not in present use, already at 0D4C.
334 ))
335
336 (defvar mlm-char-glyph-hash
337 (let* ((hash (make-hash-table :test 'equal)))
338 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
339 mlm-char-glyph)
340 hash))
341
342 (defvar mlm-char-glyph-regexp
343 (malayalam-regexp-of-hashtbl-keys mlm-char-glyph-hash))
344
345 ;; Malayalam languages needed to be reordered in a complex mannar.
346
347 (defvar mlm-consonants
348 (concat
349 "\e$,46)6*6+6,6-6.6/606162636465666768696:6;6<6=6>6?6@6A6B6C6D6E6F6G6H6I6J6K6L\e(B"
350 "\e$,47!7"7#7$7%7&7'7(7)7*7+7,7-7.7/707172737475767778797:7;7<7=7>7?7@7A7B7C7D7E7F7G7H7I7J7K7L7M7N7O7P7Q7R7S7T7U7V7W7X7Y7Z7[7\7]7^7_7`7a7b7c7d7e\e(B"
351 ))
352
353 (defvar mlm-consonants-regexp
354 (concat "\\(\e$,46[\e(B?[" mlm-consonants "][\e$,46Y6Z\e(B]?\\)"))
355
356 (defvar mlm-glyph-reorder-key-glyphs "[\e$,46[6S6T\e(B]")
357
358 (defvar mlm-glyph-reordering-regexp-list
359 `((,(concat "\\([" mlm-consonants "][\e$,46Y6Z\e(B]?\\)\e$,46[\e(B") . "\e$,46[\e(B\\1")
360 (,(concat mlm-consonants-regexp "\e$,46S6S\e(B") . "\e$,46S6S\e(B\\1")
361 (,(concat mlm-consonants-regexp "\e$,46S\e(B") . "\e$,46S\e(B\\1")
362 (,(concat mlm-consonants-regexp "\e$,46T\e(B") . "\e$,46T\e(B\\1")))
363
364 (defun malayalam-compose-syllable-string (string)
365 (with-temp-buffer
366 (insert (decompose-string string))
367 (malayalam-compose-syllable-region (point-min) (point-max))
368 (buffer-string)))
369
370 (defun malayalam-compose-syllable-region (from to)
371 "Compose malayalam syllable in region FROM to TO."
372 (let (glyph-str
373 match-str
374 glyph-reorder-regexps
375 glyph-reorder-replace
376 glyph-reorder-regexp)
377 (save-excursion
378 (save-restriction
379 (narrow-to-region from to)
380 (goto-char (point-min))
381 ;; char-glyph-conversion
382 (while (re-search-forward mlm-char-glyph-regexp nil t)
383 (setq match-str (match-string 0))
384 (setq glyph-str
385 (concat glyph-str (gethash match-str mlm-char-glyph-hash))))
386 (when (string-match mlm-glyph-reorder-key-glyphs glyph-str)
387 ;; glyph reordering
388 (setq glyph-reorder-regexps mlm-glyph-reordering-regexp-list)
389 (while glyph-reorder-regexps
390 (setq glyph-reorder-regexp (caar glyph-reorder-regexps))
391 (setq glyph-reorder-replace (cdar glyph-reorder-regexps))
392 (setq glyph-reorder-regexps (cdr glyph-reorder-regexps))
393 (if (string-match glyph-reorder-regexp glyph-str)
394 (setq glyph-str
395 (replace-match glyph-reorder-replace nil nil
396 glyph-str)))))
397 ;; concatenate and attach reference-points.
398 (setq glyph-str
399 (cdr
400 (apply
401 'nconc
402 (mapcar
403 (function
404 (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
405 glyph-str))))
406 (compose-region from to glyph-str)))))
407
408 (provide 'mlm-util)
409
410 ;;; arch-tag: 7f25ee67-8f9d-49f2-837b-35c412c00eba
411 ;;; devan-util.el ends here