Commit | Line | Data |
---|---|---|
897755c4 | 1 | ;;; devan-util.el --- Support for composing Devanagari characters -*-coding: iso-2022-7bit;-*- |
4ed46869 | 2 | |
1bec6fdb | 3 | ;; Copyright (C) 2001 Free Software Foundation, Inc. |
4ed46869 | 4 | |
1bec6fdb RS |
5 | ;; Maintainer: KAWABATA, Taichi <batta@beige.ocn.ne.jp> |
6 | ;; Keywords: multilingual, Devanagari | |
4ed46869 KH |
7 | |
8 | ;; This file is part of GNU Emacs. | |
9 | ||
10 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
11 | ;; it under the terms of the GNU General Public License as published by | |
12 | ;; the Free Software Foundation; either version 2, or (at your option) | |
13 | ;; any later version. | |
14 | ||
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
e803d6bd KH |
21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
22 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
23 | ;; Boston, MA 02111-1307, USA. | |
4ed46869 | 24 | |
1bec6fdb | 25 | ;; Created: Feb. 17. 2001 |
37cdc7ad | 26 | |
1bec6fdb | 27 | ;;; Commentary: |
4ed46869 | 28 | |
1bec6fdb RS |
29 | ;; This file provides character(Unicode) to glyph(CDAC) conversion and |
30 | ;; composition of Devanagari script characters. | |
4ed46869 KH |
31 | |
32 | ;;; Code: | |
33 | ||
1d475b5e | 34 | ;;;###autoload |
4ed46869 | 35 | |
1bec6fdb RS |
36 | ;; Devanagari Composable Pattern |
37 | ;; C .. Consonants | |
38 | ;; V .. Vowel | |
39 | ;; H .. Halant | |
40 | ;; M .. Matra | |
41 | ;; V .. Vowel | |
42 | ;; A .. Anuswar | |
43 | ;; D .. Chandrabindu | |
44 | ;; (N .. Zerowidth Non Joiner) | |
45 | ;; (J .. Zerowidth Joiner. ) | |
46 | ;; 1. vowel | |
47 | ;; V(A/D)? | |
48 | ;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya) | |
9e35da28 | 49 | ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)? |
1bec6fdb RS |
50 | |
51 | (defconst devanagari-consonant | |
52 | "[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]") | |
53 | ||
54 | (defconst devanagari-composable-pattern | |
0b520940 | 55 | (concat |
1bec6fdb RS |
56 | "\\([\e$,15E\e(B-\e$,15T6@6A\e(B][\e$,15A5B\e(B]?\\)\\|\e$,15C\e(B" |
57 | "\\|\\(" | |
58 | "\\(?:\\(?:[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]\e$,16-\e(B\\)?\\(?:[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]\e$,16-\e(B\\)?\\(?:[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]\e$,16-\e(B\\)?[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]\e$,16-\e(B\\)?" | |
59 | "[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]\\(?:\e$,16-\e(B\\|[\e$,15~\e(B-\e$,16-6B6C\e(B]?[\e$,15B5A\e(B]?\\)?" | |
60 | "\\)") | |
61 | "Regexp matching a composable sequence of Devanagari characters.") | |
62 | ||
63 | (defun devanagari-compose-region (from to) | |
4ed46869 | 64 | (interactive "r") |
31ce7719 | 65 | (save-excursion |
1bec6fdb RS |
66 | (save-restriction |
67 | (narrow-to-region from to) | |
68 | (goto-char (point-min)) | |
69 | (while (re-search-forward devanagari-composable-pattern nil t) | |
0b520940 DL |
70 | (devanagari-compose-syllable-region (match-beginning 0) |
71 | (match-end 0)))))) | |
1bec6fdb RS |
72 | (defun devanagari-compose-string (string) |
73 | (with-temp-buffer | |
74 | (insert (decompose-string string)) | |
75 | (devanagari-compose-region (point-min) (point-max)) | |
76 | (buffer-string))) | |
77 | ||
91c78c93 DL |
78 | (defun devanagari-post-read-conversion (len) |
79 | (save-excursion | |
80 | (save-restriction | |
81 | (let ((buffer-modified-p (buffer-modified-p))) | |
82 | (narrow-to-region (point) (+ (point) len)) | |
83 | (devanagari-compose-region (point-min) (point-max)) | |
84 | (set-buffer-modified-p buffer-modified-p) | |
85 | (- (point-max) (point-min)))))) | |
86 | ||
4f102de2 | 87 | (defun devanagari-range (from to) |
1bec6fdb | 88 | "Make the list of the integers of range FROM to TO." |
0b520940 | 89 | (let (result) |
1bec6fdb RS |
90 | (while (<= from to) (setq result (cons to result) to (1- to))) result)) |
91 | ||
4f102de2 RS |
92 | (defun devanagari-regexp-of-hashtbl-keys (hashtbl) |
93 | "Return a regular expression that matches all keys in hashtable HASHTBL." | |
1bec6fdb RS |
94 | (let ((max-specpdl-size 1000)) |
95 | (regexp-opt | |
0b520940 | 96 | (sort |
1bec6fdb RS |
97 | (let (dummy) |
98 | (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl) | |
99 | dummy) | |
100 | (function (lambda (x y) (> (length x) (length y)))))))) | |
101 | ||
102 | (defun devanagari-composition-function (from to pattern &optional string) | |
103 | "Compose Devanagari characters in REGION, or STRING if specified. | |
104 | Assume that the REGION or STRING must fully match the composable | |
105 | PATTERN regexp." | |
106 | (if string (devanagari-compose-syllable-string string) | |
107 | (devanagari-compose-syllable-region from to)) | |
108 | (- to from)) | |
109 | ||
110 | ;; Register a function to compose Devanagari characters. | |
111 | (mapc | |
112 | (function (lambda (ucs) | |
ca17cd52 | 113 | (aset composition-function-table ucs |
0b520940 DL |
114 | (list (cons devanagari-composable-pattern |
115 | 'devanagari-composition-function))))) | |
4f102de2 | 116 | (nconc '(#x0903) (devanagari-range #x0905 #x0939) (devanagari-range #x0958 #x0961))) |
1bec6fdb RS |
117 | |
118 | ;; Notes on conversion steps. | |
119 | ||
0b520940 | 120 | ;; 1. chars to glyphs |
1bec6fdb RS |
121 | ;; |
122 | ;; Rules will not be applied to the halant appeared at the end of the | |
123 | ;; text. Also, the preceding/following "r" will be treated as special case. | |
124 | ||
125 | ;; 2. glyphs reordering. | |
126 | ;; | |
127 | ;; The glyphs are split by halant, and each glyph groups are | |
128 | ;; re-ordered in the following order. | |
129 | ;; | |
130 | ;; Note that `consonant-glyph' mentioned here does not contain the | |
131 | ;; vertical bar (right modifier) attached at the right of the | |
132 | ;; consonant. | |
0b520940 DL |
133 | ;; |
134 | ;; If the glyph-group contains right modifier, | |
1bec6fdb RS |
135 | ;; (1) consonant-glyphs/vowels, with nukta sign |
136 | ;; (2) spacing | |
137 | ;; (3) right modifier (may be matra) | |
138 | ;; (4) top matra | |
139 | ;; (5) preceding "r" | |
140 | ;; (6) anuswar | |
141 | ;; (7) following "r" | |
142 | ;; (8) bottom matra or halant. | |
0b520940 DL |
143 | ;; |
144 | ;; Otherwise, | |
1bec6fdb RS |
145 | ;; (1) consonant-glyph/vowels, with nukta sign |
146 | ;; (3) left matra | |
0b520940 | 147 | ;; (4) top matra |
1bec6fdb RS |
148 | ;; (5) preceding "r" |
149 | ;; (6) anuswar | |
150 | ;; (7) following "r" | |
151 | ;; (8) bottom matra or halant. | |
152 | ;; (2) spacing | |
153 | ||
154 | ;; 3. glyph to glyph | |
155 | ;; | |
156 | ;; For better display, some glyph display would be tuned. | |
157 | ||
158 | ;; 4. Composition. | |
159 | ;; | |
160 | ;; left modifiers will be attached at the left. | |
161 | ;; others will be attached right. | |
162 | ||
163 | ;; Problem:: | |
164 | ;; Can we generalize this methods to other Indian scripts? | |
165 | ||
77539682 KH |
166 | (defvar dev-char-glyph |
167 | '(("\e$,15E\e(B" . "\e$,4 K\e(B") | |
1d475b5e KH |
168 | ("\e$,15F\e(B" . "\e$,4 K")\e(B") |
169 | ("\e$,15~\e(B" . "\e$,4")\e(B") | |
77539682 | 170 | ("\e$,15G\e(B" . "\e$,4 \\e(B") |
1d475b5e KH |
171 | ("\e$,15\7f\e(B" . "\e$,4"*\e(B") |
172 | ("\e$,15\7f5A\e(B" . "\e$,4"*\e(B\\e$,4"&\e(B") | |
77539682 | 173 | ("\e$,15H\e(B" . "\e$,4 \"'\e(B") |
1d475b5e KH |
174 | ("\e$,15H5A\e(B" . "\e$,4 \"'"&\e(B") |
175 | ("\e$,16 \e(B" . "\e$,4"2\e(B") | |
176 | ("\e$,16 5A\e(B" . "\e$,4"2"&\e(B") | |
77539682 | 177 | ("\e$,15I\e(B" . "\e$,4 ]\e(B") |
1d475b5e KH |
178 | ("\e$,16!\e(B" . "\e$,4"6\e(B") |
179 | ("\e$,15J\e(B" . "\e$,4 ^"P\e(B") | |
180 | ("\e$,16"\e(B" . "\e$,4":\e(B") | |
181 | ("\e$,15K\e(B" . "\e$,4 `"Q\e(B") | |
182 | ("\e$,16#\e(B" . "\e$,4">\e(B") | |
77539682 | 183 | ;;("\e$,15L\e(B" . nil) ; not implemented. |
1d475b5e KH |
184 | ("\e$,16$\e(B" . "\e$,4"?\e(B") |
185 | ("\e$,15M\e(B" . "\e$,4 b"L\e(B") | |
186 | ("\e$,15M5A\e(B" . "\e$,4 b"$\e(B") | |
187 | ("\e$,15M5B\e(B" . "\e$,4 b"$\e(B") | |
188 | ("\e$,16%\e(B" . "\\e$,4"L\e(B") | |
0b520940 DL |
189 | ("\e$,15N\e(B" . "\e$,4 b"@\e(B") |
190 | ("\e$,15N5A\e(B" . "\e$,4 b"@"&\e(B") | |
1d475b5e KH |
191 | ("\e$,16&\e(B" . "\\e$,4"@\e(B") |
192 | ("\e$,16&5A\e(B" . "\\e$,4"@\e(B\\e$,4"&\e(B") | |
0b520940 | 193 | ("\e$,15O\e(B" . "\e$,4 b\e(B") |
1d475b5e KH |
194 | ("\e$,16'\e(B" . "\\e$,4"D\e(B") |
195 | ("\e$,16'5A\e(B" . "\\e$,4"D\e(B\\e$,4"&\e(B") | |
0b520940 DL |
196 | ("\e$,15P\e(B" . "\e$,4 b"D\e(B") |
197 | ("\e$,15P5A\e(B" . "\e$,4 b"D"&\e(B") | |
1d475b5e KH |
198 | ("\e$,16(\e(B" . "\\e$,4"H\e(B") |
199 | ("\e$,16(5A\e(B" . "\\e$,4"H\e(B\\e$,4"&\e(B") | |
200 | ("\e$,15Q\e(B" . "\e$,4 K")"L\e(B") ;; special rule for reodering. | |
201 | ("\e$,15Q5A\e(B" . "\e$,4 K")"$\e(B") | |
202 | ("\e$,15Q5B\e(B" . "\e$,4 K")"$\e(B") | |
203 | ("\e$,16)\e(B" . "\\e$,4")"L\e(B") | |
204 | ("\e$,16)5A\e(B" . "\\e$,4")"$\e(B") | |
205 | ("\e$,16)5B\e(B" . "\\e$,4")"$\e(B") | |
0b520940 DL |
206 | ("\e$,15R\e(B" . "\e$,4 K")"@\e(B") |
207 | ("\e$,15R5A\e(B" . "\e$,4 K")"@"&\e(B") | |
1d475b5e KH |
208 | ("\e$,16*\e(B" . "\\e$,4")"@\e(B") |
209 | ("\e$,16*5A\e(B" . "\\e$,4")"@"&\e(B") | |
210 | ("\e$,15S\e(B" . "\e$,4 K")"D\e(B") | |
211 | ("\e$,15S5A\e(B" . "\e$,4 K")"D"&\e(B") | |
212 | ("\e$,16+\e(B" . "\\e$,4")"D\e(B") | |
213 | ("\e$,16+5A\e(B" . "\\e$,4")"D"&\e(B") | |
0b520940 DL |
214 | ("\e$,15T\e(B" . "\e$,4 K")"H\e(B") |
215 | ("\e$,15T5A\e(B" . "\e$,4 K")"H"&\e(B") | |
1d475b5e KH |
216 | ("\e$,16,\e(B" . "\\e$,4")"H\e(B") |
217 | ("\e$,16,5A\e(B" . "\\e$,4")"H"&\e(B") | |
0b520940 | 218 | ("\e$,16@\e(B" . "\e$,4 a"Q\e(B") |
77539682 | 219 | ;;("\e$,16B\e(B" . nil) |
0b520940 | 220 | ;;("\e$,16A\e(B" . nil) |
77539682 KH |
221 | ;;("\e$,16C\e(B" . nil) |
222 | ||
223 | ;; GRUTTALS | |
1d475b5e | 224 | ("\e$,15U\e(B" . "\e$,4 e"R\e(B") |
77539682 | 225 | ("\e$,15U6-\e(B" . "\e$,4 c\e(B") |
1d475b5e KH |
226 | ("\e$,15U6-5p\e(B" . "\e$,4 g"R\e(B") |
227 | ("\e$,15U6-5d\e(B" . "\e$,4 h"R\e(B") | |
228 | ("\e$,15U6-5w\e(B" . "\e$,4 i")\e(B") | |
77539682 KH |
229 | ("\e$,15U6-5w6-\e(B" . "\e$,4 i\e(B") |
230 | ||
1d475b5e | 231 | ("\e$,15V\e(B" . "\e$,4 j")\e(B") |
77539682 | 232 | ("\e$,15V6-\e(B" . "\e$,4 j\e(B") |
1d475b5e | 233 | ("\e$,15V6-5p\e(B" . "\e$,4 l")\e(B") |
77539682 KH |
234 | ("\e$,15V6-5p6-\e(B" . "\e$,4 l\e(B") |
235 | ||
0b520940 DL |
236 | ("\e$,15W\e(B" . "\e$,4 m")\e(B") |
237 | ("\e$,15W6-\e(B" . "\e$,4 m\e(B") | |
1d475b5e | 238 | ("\e$,15W6-5p\e(B" . "\e$,4 o")\e(B") |
77539682 KH |
239 | ("\e$,15W6-5p6-\e(B" . "\e$,4 o\e(B") |
240 | ||
0b520940 DL |
241 | ("\e$,15X\e(B" . "\e$,4 p")\e(B") |
242 | ("\e$,15X6-\e(B" . "\e$,4 p\e(B") | |
243 | ("\e$,15X6-5p\e(B" . "\e$,4 q")\e(B") | |
244 | ("\e$,15X6-5p6-\e(B" . "\e$,4 q\e(B") | |
77539682 | 245 | |
1d475b5e | 246 | ("\e$,15Y\e(B" . "\e$,4 r"S\e(B") |
0b520940 DL |
247 | ;; PALATALS |
248 | ("\e$,15Z\e(B" . "\e$,4 s")\e(B") | |
249 | ("\e$,15Z6-\e(B" . "\e$,4 s\e(B") | |
250 | ("\e$,15Z6-5p\e(B" . "\e$,4 t")\e(B") | |
77539682 KH |
251 | ("\e$,15Z6-5p6-\e(B" . "\e$,4 t\e(B") |
252 | ||
0b520940 | 253 | ("\e$,15[\e(B" . "\e$,4 u"T\e(B") |
77539682 | 254 | |
0b520940 DL |
255 | ("\e$,15\\e(B" . "\e$,4 v")\e(B") |
256 | ("\e$,15\6-\e(B" . "\e$,4 v\e(B") | |
257 | ("\e$,15\6-5p\e(B" . "\e$,4 x")\e(B") | |
258 | ("\e$,15\6-5p6-\e(B" . "\e$,4 x\e(B") | |
259 | ("\e$,15\6-5^\e(B" . "\e$,4 y")\e(B") | |
260 | ("\e$,15\6-5^6-\e(B" . "\e$,4 y\e(B") | |
77539682 | 261 | |
0b520940 DL |
262 | ("\e$,15]\e(B" . "\e$,4 z")\e(B") |
263 | ("\e$,15]6-\e(B" . "\e$,4 z\e(B") | |
264 | ("\e$,15]6-5p\e(B" . "\e$,4 {")\e(B") | |
265 | ("\e$,15]6-5p6-\e(B" . "\e$,4 {\e(B") | |
77539682 | 266 | |
1d475b5e | 267 | ("\e$,15^\e(B" . "\e$,4 |")\e(B") |
77539682 | 268 | ("\e$,15^6-\e(B" . "\e$,4 |\e(B") |
0b520940 | 269 | ;; CEREBRALS |
1d475b5e KH |
270 | ("\e$,15_\e(B" . "\e$,4 }"U\e(B") |
271 | ("\e$,15_6-5_\e(B" . "\e$,4 ~"U\e(B") | |
272 | ("\e$,15_6-5`\e(B" . "\e$,4 \7f"U\e(B") | |
77539682 | 273 | |
0b520940 DL |
274 | ("\e$,15`\e(B" . "\e$,4! "V\e(B") |
275 | ("\e$,15`6-5`\e(B" . "\e$,4!!"V\e(B") | |
77539682 | 276 | |
0b520940 DL |
277 | ("\e$,15a\e(B" . "\e$,4!""W\e(B") |
278 | ("\e$,15a6-5a\e(B" . "\e$,4!$"W\e(B") | |
279 | ("\e$,15a6-5b\e(B" . "\e$,4!%"W\e(B") | |
77539682 | 280 | |
0b520940 | 281 | ("\e$,15b\e(B" . "\e$,4!&"X\e(B") |
77539682 | 282 | |
1d475b5e | 283 | ("\e$,15c\e(B" . "\e$,4!(")\e(B") |
77539682 | 284 | ("\e$,15c6-\e(B" . "\e$,4!(\e(B") |
0b520940 DL |
285 | ;; DENTALS |
286 | ("\e$,15d\e(B" . "\e$,4!)")\e(B") | |
287 | ("\e$,15d6-\e(B" . "\e$,4!)\e(B") | |
288 | ("\e$,15d6-5p\e(B" . "\e$,4!*")\e(B") | |
289 | ("\e$,15d6-5p6-\e(B" . "\e$,4!*\e(B") | |
290 | ("\e$,15d6-5d\e(B" . "\e$,4!+")\e(B") | |
291 | ("\e$,15d6-5d6-\e(B" . "\e$,4!+\e(B") | |
292 | ||
293 | ("\e$,15e\e(B" . "\e$,4!,")\e(B") | |
294 | ("\e$,15e6-\e(B" . "\e$,4!,\e(B") | |
295 | ("\e$,15e6-5p\e(B" . "\e$,4!-")\e(B") | |
296 | ("\e$,15e6-5p6-\e(B" . "\e$,4!-\e(B") | |
297 | ||
298 | ("\e$,15f\e(B" . "\e$,4!."Y\e(B") | |
1d475b5e KH |
299 | ("\e$,15f6#\e(B" . "\e$,4!/"Y\e(B") |
300 | ("\e$,15f6-5p\e(B" . "\e$,4!0"Y\e(B") | |
301 | ("\e$,15f6-5f\e(B" . "\e$,4!1"Y\e(B") | |
302 | ("\e$,15f6-5g\e(B" . "\e$,4!2"Y\e(B") | |
77539682 KH |
303 | ("\e$,15f6-5n\e(B" . "\e$,4!3\e(B") |
304 | ("\e$,15f6-5o\e(B" . "\e$,4!4\e(B") | |
1d475b5e | 305 | ("\e$,15f6-5u\e(B" . "\e$,4!5"Y\e(B") |
77539682 | 306 | |
0b520940 DL |
307 | ("\e$,15g\e(B" . "\e$,4!6")\e(B") |
308 | ("\e$,15g6-\e(B" . "\e$,4!6\e(B") | |
309 | ("\e$,15g6-5p\e(B" . "\e$,4!7")\e(B") | |
310 | ("\e$,15g6-5p6-\e(B" . "\e$,4!7\e(B") | |
77539682 | 311 | |
0b520940 DL |
312 | ("\e$,15h\e(B" . "\e$,4!8")\e(B") |
313 | ("\e$,15h6-\e(B" . "\e$,4!8\e(B") | |
314 | ("\e$,15h6-5p\e(B" . "\e$,4!9")\e(B") | |
315 | ("\e$,15h6-5p6-\e(B" . "\e$,4!9")\e(B") | |
316 | ("\e$,15h6-5h\e(B" . "\e$,4!:")\e(B") | |
317 | ("\e$,15h6-5h6-\e(B" . "\e$,4!:\e(B") | |
77539682 | 318 | |
1d475b5e | 319 | ("\e$,15i\e(B" . "\e$,4!8"#")\e(B") |
0b520940 DL |
320 | ;; LABIALS |
321 | ("\e$,15j\e(B" . "\e$,4!;")\e(B") | |
322 | ("\e$,15j6-\e(B" . "\e$,4!;\e(B") | |
323 | ("\e$,15j6-5p\e(B" . "\e$,4!<")\e(B") | |
324 | ("\e$,15j6-5p6-\e(B" . "\e$,4!<\e(B") | |
325 | ||
326 | ("\e$,15k\e(B" . "\e$,4!a"[\e(B") | |
327 | ("\e$,15k6-\e(B" . "\e$,4!=\e(B") | |
328 | ("\e$,15k6-5p\e(B" . "\e$,4!c"[\e(B") | |
329 | ||
330 | ("\e$,15l\e(B" . "\e$,4!d")\e(B") | |
331 | ("\e$,15l6-\e(B" . "\e$,4!d\e(B") | |
332 | ("\e$,15l6-5p\e(B" . "\e$,4!e")\e(B") | |
333 | ("\e$,15l6-5p6-\e(B" . "\e$,4!e\e(B") | |
334 | ||
335 | ("\e$,15m\e(B" . "\e$,4!f")\e(B") | |
336 | ("\e$,15m6-\e(B" . "\e$,4!f\e(B") | |
337 | ("\e$,15m6-5p\e(B" . "\e$,4!g")\e(B") | |
338 | ("\e$,15m6-5p6-\e(B" . "\e$,4!g\e(B") | |
77539682 | 339 | |
1d475b5e | 340 | ("\e$,15n\e(B" . "\e$,4!h")\e(B") |
77539682 | 341 | ("\e$,15n6-\e(B" . "\e$,4!h\e(B") |
1d475b5e | 342 | ("\e$,15n6-5p\e(B" . "\e$,4!i")\e(B") |
77539682 KH |
343 | ("\e$,15n6-5p6-\e(B" . "\e$,4!i\e(B") |
344 | ;; SEMIVOWELS | |
0b520940 DL |
345 | ("\e$,15o\e(B" . "\e$,4!j")\e(B") |
346 | ("\e$,15o6-\e(B" . "\e$,4!j\e(B") | |
347 | ("\e$,15o6-5p\e(B" . "\e$,4!k")\e(B") | |
348 | ("\e$,15o6-5p6-\e(B" . "\e$,4!k\e(B") | |
77539682 KH |
349 | ("\e$,16-5o\e(B" . "\e$,4!l\e(B") ;; when every ohter lig. fails. |
350 | ||
0b520940 | 351 | ("\e$,15p\e(B" . "\e$,4!n"W\e(B") |
77539682 | 352 | ;; ("\e$,15p6-\e(B" . "\\e$,4"'\e(B") ;; special case. only the topmost pos. |
0b520940 | 353 | ("\e$,15q\e(B" . "\e$,4!n"#"W\e(B") |
77539682 | 354 | ("\e$,15q6-\e(B" . "\e$,4!m\e(B") ;; IS 13194 speical rule. |
0b520940 DL |
355 | ("\e$,15p6!\e(B" . "\e$,4!o"[\e(B") |
356 | ("\e$,15p6"\e(B" . "\e$,4!p"\\e(B") | |
77539682 | 357 | |
0b520940 DL |
358 | ("\e$,15r\e(B" . "\e$,4!q")\e(B") |
359 | ("\e$,15r6-\e(B" . "\e$,4!q\e(B") | |
360 | ("\e$,15s\e(B" . "\e$,4!s\e(B") | |
361 | ("\e$,15s6-\e(B" . "\e$,4!r\e(B") | |
1d475b5e KH |
362 | ("\e$,15t\e(B" . "\e$,4!s"#\e(B") |
363 | ("\e$,15t6-\e(B" . "\e$,4!r"#\e(B") | |
77539682 | 364 | |
1d475b5e | 365 | ("\e$,15u\e(B" . "\e$,4!t")\e(B") |
77539682 | 366 | ("\e$,15u6-\e(B" . "\e$,4!t\e(B") |
1d475b5e | 367 | ("\e$,15u6-5p\e(B" . "\e$,4!u")\e(B") |
77539682 | 368 | ("\e$,15u6-5p6-\e(B" . "\e$,4!u\e(B") |
0b520940 DL |
369 | ;; SIBILANTS |
370 | ("\e$,15v\e(B" . "\e$,4!v")\e(B") | |
77539682 | 371 | ("\e$,15v6-\e(B" . "\e$,4!v\e(B") |
1d475b5e | 372 | ("\e$,15v6-5u\e(B" . "\e$,4!w")\e(B") |
77539682 | 373 | ("\e$,15v6-5u6-\e(B" . "\e$,4!w\e(B") |
1d475b5e | 374 | ("\e$,15v6-5p\e(B" . "\e$,4!x")\e(B") |
77539682 KH |
375 | ("\e$,15v6-5p6-\e(B" . "\e$,4!x\e(B") |
376 | ||
1d475b5e | 377 | ("\e$,15w\e(B" . "\e$,4!y")\e(B") |
77539682 | 378 | ("\e$,15w6-\e(B" . "\e$,4!y\e(B") |
1d475b5e | 379 | ("\e$,15x\e(B" . "\e$,4!z")\e(B") |
77539682 | 380 | ("\e$,15x6-\e(B" . "\e$,4!z\e(B") |
1d475b5e | 381 | ("\e$,15x6-5p\e(B" . "\e$,4!{")\e(B") |
77539682 KH |
382 | ("\e$,15x6-5p6-\e(B" . "\e$,4!{\e(B") |
383 | ||
384 | ("\e$,15y\e(B" . "\e$,4!}\e(B") | |
385 | ("\e$,15y6-\e(B" . "\e$,4!|\e(B") | |
386 | ("\e$,15y6#\e(B" . "\e$,4!~\e(B") | |
387 | ("\e$,15y6-5p\e(B" . "\e$,4!\7f\e(B") | |
1d475b5e KH |
388 | ("\e$,15y6-5n\e(B" . "\e$,4" \e(B") |
389 | ("\e$,15y6-5o\e(B" . "\e$,4"!\e(B") | |
0b520940 | 390 | ;; NUKTAS |
1d475b5e | 391 | ("\e$,168\e(B" . "\e$,4 f"R"S\e(B") |
0b520940 DL |
392 | ("\e$,1686-\e(B" . "\e$,4 d\e(B") |
393 | ("\e$,169\e(B" . "\e$,4 k")\e(B") | |
394 | ("\e$,1696-\e(B" . "\e$,4 k\e(B") | |
395 | ("\e$,16:\e(B" . "\e$,4 n")\e(B") | |
396 | ("\e$,16:6-\e(B" . "\e$,4 n\e(B") | |
397 | ("\e$,16;\e(B" . "\e$,4 w")\e(B") | |
398 | ("\e$,16;6-\e(B" . "\e$,4 w\e(B") | |
399 | ("\e$,16<\e(B" . "\e$,4!#"W\e(B") | |
400 | ("\e$,16=\e(B" . "\e$,4!'"X\e(B") | |
401 | ("\e$,16>\e(B" . "\e$,4!b"[\e(B") | |
402 | ("\e$,16>6-\e(B" . "\e$,4!>\e(B") | |
1d475b5e | 403 | ("\e$,16?\e(B" . "\e$,4!j"#")\e(B") |
77539682 | 404 | ;; misc modifiers. |
1d475b5e | 405 | ("\e$,15A\e(B" . "\\e$,4"$\e(B") |
0b520940 | 406 | ("\e$,15B\e(B" . "\\e$,4"&\e(B") |
77539682 | 407 | ("\e$,15C\e(B" . "\e$,4 F\e(B") |
1d475b5e | 408 | ("\e$,15|\e(B" . "\e$,4"#\e(B") |
77539682 | 409 | ("\e$,15}\e(B" . "\e$,4 E\e(B") |
1d475b5e KH |
410 | ("\e$,16-\e(B" . "\e$,4""\e(B") |
411 | ("\e$,16-5p\e(B" . "\e$,4"%\e(B") ;; following "r" | |
0b520940 | 412 | ;; ("\e$,160\e(B" . "\e$,4 D\e(B") |
77539682 | 413 | ;; ("\e$,16D\e(B" . "\e$,4 J\e(B") |
0b520940 DL |
414 | ;; ("\e$,16F\e(B" . "") |
415 | ;; ("\e$,16G\e(B" . "") | |
416 | ;; ("\e$,16H\e(B" . "") | |
417 | ;; ("\e$,16I\e(B" . "") | |
418 | ;; ("\e$,16J\e(B" . "") | |
419 | ;; ("\e$,16K\e(B" . "") | |
420 | ;; ("\e$,16L\e(B" . "") | |
421 | ;; ("\e$,16M\e(B" . "") | |
422 | ;; ("\e$,16N\e(B" . "") | |
77539682 KH |
423 | ;; ("\e$,16O\e(B" . "") |
424 | ) | |
0b520940 | 425 | "Devanagari characters to glyphs conversion table. |
77539682 KH |
426 | Default value contains only the basic rules. You may add your own |
427 | preferred rule from the sanskrit fonts." ) | |
1bec6fdb RS |
428 | |
429 | (defvar dev-char-glyph-hash | |
430 | (let* ((hash (makehash 'equal))) | |
431 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
432 | dev-char-glyph) | |
433 | hash)) | |
434 | ||
435 | (defvar dev-char-glyph-regexp | |
4f102de2 | 436 | (devanagari-regexp-of-hashtbl-keys dev-char-glyph-hash)) |
1bec6fdb RS |
437 | |
438 | ;; glyph-to-glyph conversion table. | |
0b520940 | 439 | ;; it is supposed that glyphs are ordered in |
1bec6fdb RS |
440 | ;; [consonant/nukta] - [matra/halant] - [preceding-r] - [anuswar]. |
441 | ||
77539682 | 442 | (defvar dev-glyph-glyph |
1d475b5e KH |
443 | '(("\\e$,4"'\e(B\\e$,4"&\e(B" . "\\e$,4"(\e(B") |
444 | ("\\e$,4"'\e(B\\e$,4"$\e(B" . "\\e$,4"(\e(B") | |
445 | ("\e$,4"*\e(B\\e$,4"&\e(B" . "\e$,4"+\e(B") | |
446 | ("\e$,4"*\e(B\\e$,4"'\e(B" . "\e$,4",\e(B") | |
447 | ("\e$,4"*\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\e$,4"-\e(B") | |
448 | ("\e$,4"2\e(B\\e$,4"&\e(B" . "\e$,4"3\e(B") | |
449 | ("\e$,4"2\e(B\\e$,4"'\e(B" . "\e$,4"4\e(B") | |
450 | ("\e$,4"2\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\e$,4"5\e(B") | |
451 | ("\e$,4"#\e(B\\e$,4"6\e(B" . "\e$,4"7\e(B") | |
452 | ("\e$,4"%\e(B\\e$,4"6\e(B" . "\e$,4"8\e(B") | |
453 | ;;("\e$,4"6\e(B" . "\e$,4"9\e(B") | |
454 | ("\e$,4"#\e(B\\e$,4":\e(B" . "\e$,4";\e(B") | |
455 | ("\e$,4"%\e(B\\e$,4":\e(B" . "\e$,4"<\e(B") | |
456 | ;;("\e$,4":\e(B" . "\e$,4"=\e(B") | |
457 | ("\\e$,4"@\e(B\\e$,4"&\e(B" . "\\e$,4"A\e(B") | |
458 | ("\\e$,4"@\e(B\\e$,4"'\e(B" . "\\e$,4"B\e(B") | |
459 | ("\\e$,4"@\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\\e$,4"C\e(B") | |
460 | ("\\e$,4"D\e(B\\e$,4"&\e(B" . "\\e$,4"E\e(B") | |
461 | ("\\e$,4"D\e(B\\e$,4"'\e(B" . "\\e$,4"F\e(B") | |
462 | ("\\e$,4"D\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\\e$,4"G\e(B") | |
463 | ("\\e$,4"H\e(B\\e$,4"&\e(B" . "\\e$,4"I\e(B") | |
464 | ("\\e$,4"H\e(B\\e$,4"'\e(B" . "\\e$,4"J\e(B") | |
465 | ("\\e$,4"H\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\\e$,4"K\e(B") | |
466 | ("\\e$,4"L\e(B\\e$,4"&\e(B" . "\\e$,4"M\e(B") | |
467 | ("\\e$,4"L\e(B\\e$,4"'\e(B" . "\\e$,4"N\e(B") | |
468 | ("\\e$,4"L\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\\e$,4"O\e(B") | |
77539682 | 469 | )) |
1bec6fdb RS |
470 | (defvar dev-glyph-glyph-hash |
471 | (let* ((hash (makehash 'equal))) | |
472 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
473 | dev-glyph-glyph) | |
474 | hash)) | |
475 | (defvar dev-glyph-glyph-regexp | |
4f102de2 | 476 | (devanagari-regexp-of-hashtbl-keys dev-glyph-glyph-hash)) |
1bec6fdb RS |
477 | |
478 | ||
479 | ;; yet another glyph-to-glyph conversions. | |
77539682 | 480 | (defvar dev-glyph-glyph-2 |
1d475b5e KH |
481 | '(("\e$,4"*\e(B" . "\e$,4".\e(B") |
482 | ("\e$,4"+\e(B" . "\e$,4"/\e(B") | |
483 | ("\e$,4",\e(B" . "\e$,4"0\e(B") | |
484 | ("\e$,4"-\e(B" . "\e$,4"1\e(B"))) | |
1bec6fdb RS |
485 | (defvar dev-glyph-glyph-2-hash |
486 | (let* ((hash (makehash 'equal))) | |
487 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
488 | dev-glyph-glyph-2) | |
489 | hash)) | |
490 | (defvar dev-glyph-glyph-2-regexp | |
4f102de2 | 491 | (devanagari-regexp-of-hashtbl-keys dev-glyph-glyph-2-hash)) |
1bec6fdb RS |
492 | |
493 | ||
494 | (defun dev-charseq (from &optional to) | |
495 | (if (null to) (setq to from)) | |
0b520940 DL |
496 | (devanagari-range (make-char 'devanagari-glyph from) |
497 | (make-char 'devanagari-glyph to))) | |
1bec6fdb RS |
498 | |
499 | (defvar dev-glyph-cvn | |
0b520940 | 500 | (append |
1bec6fdb RS |
501 | (dev-charseq #x2b) |
502 | (dev-charseq #x3c #xc1) | |
503 | (dev-charseq #xc3)) | |
504 | "Devanagari Consonants/Vowels/Nukta Glyphs") | |
505 | ||
506 | (defvar dev-glyph-space | |
507 | (dev-charseq #xf0 #xfe) | |
508 | "Devanagari Spacing Glyphs") | |
509 | ||
510 | (defvar dev-glyph-right-modifier | |
0b520940 | 511 | (append |
1bec6fdb RS |
512 | (dev-charseq #xc9) |
513 | (dev-charseq #xd2 #xd5)) | |
514 | "Devanagari Modifiers attached at the right side.") | |
515 | ||
516 | (defvar dev-glyph-right-modifier-regexp | |
517 | (concat "[" dev-glyph-right-modifier "]")) | |
518 | ||
519 | (defvar dev-glyph-left-matra | |
520 | (dev-charseq #xca #xd1) | |
521 | "Devanagari Matras attached at the left side.") | |
522 | ||
523 | (defvar dev-glyph-top-matra | |
524 | (dev-charseq #xe0 #xef) | |
525 | "Devanagari Matras attached at the top side.") | |
526 | ||
527 | (defvar dev-glyph-bottom-modifier | |
0b520940 | 528 | (append |
1bec6fdb RS |
529 | (dev-charseq #xd6 #xdf) |
530 | (dev-charseq #xc2)) | |
531 | "Devanagari Modifiers attached at the bottom.") | |
532 | ||
533 | (defvar dev-glyph-order | |
534 | `((,dev-glyph-cvn . 1) | |
535 | (,dev-glyph-space . 2) | |
536 | (,dev-glyph-right-modifier . 3) | |
537 | (,dev-glyph-left-matra . 3) ;; processed by reference point. | |
538 | (,dev-glyph-top-matra . 4) | |
539 | (,(dev-charseq #xc7 #xc8) . 5) | |
540 | (,(dev-charseq #xc6) . 6) | |
541 | (,(dev-charseq #xc5) . 7) | |
542 | (,dev-glyph-bottom-modifier . 8))) | |
543 | ||
0b520940 | 544 | (mapc |
1bec6fdb | 545 | (function (lambda (x) |
0b520940 | 546 | (mapc |
1bec6fdb RS |
547 | (function (lambda (y) |
548 | (put-char-code-property y 'composition-order (cdr x)))) | |
549 | (car x)))) | |
550 | dev-glyph-order) | |
551 | ||
552 | (mapc | |
553 | (function (lambda (x) | |
554 | (put-char-code-property x 'reference-point '(3 . 5)))) | |
555 | dev-glyph-left-matra) | |
556 | ||
557 | (defun devanagari-compose-syllable-string (string) | |
558 | (with-temp-buffer | |
559 | (insert (decompose-string string)) | |
560 | (devanagari-compose-syllable-region (point-min) (point-max)) | |
561 | (buffer-string))) | |
562 | ||
77539682 KH |
563 | (defun devanagari-compose-syllable-region (from to) |
564 | "Compose devanagari syllable in region FROM to TO." | |
565 | (let ((glyph-str nil) (cons-num 0) glyph-str-list | |
0b520940 DL |
566 | (last-halant nil) (preceding-r nil) (last-modifier nil) |
567 | (last-char (char-before to)) match-str | |
568 | glyph-block split-pos) | |
77539682 KH |
569 | (save-excursion |
570 | (save-restriction | |
0b520940 DL |
571 | ;;; *** char-to-glyph conversion *** |
572 | ;; Special rule 1. -- Last halant must be preserved. | |
573 | (if (eq last-char ?\e$,16-\e(B) | |
574 | (progn | |
575 | (setq last-halant t) | |
576 | (narrow-to-region from (1- to))) | |
577 | (narrow-to-region from to) | |
578 | ;; note if the last char is modifier. | |
579 | (if (or (eq last-char ?\e$,15A\e(B) (eq last-char ?\e$,15B\e(B)) | |
580 | (setq last-modifier t))) | |
581 | (goto-char (point-min)) | |
582 | ;; Special rule 2. -- preceding "r halant" must be modifier. | |
583 | (when (looking-at "\e$,15p6-\e(B.") | |
584 | (setq preceding-r t) | |
585 | (goto-char (+ 2 (point)))) | |
586 | ;; translate the rest characters into glyphs | |
587 | (while (re-search-forward dev-char-glyph-regexp nil t) | |
588 | (setq match-str (match-string 0)) | |
589 | (setq glyph-str | |
590 | (concat glyph-str | |
591 | (gethash match-str dev-char-glyph-hash))) | |
592 | ;; count the number of consonant-glyhs. | |
593 | (if (string-match devanagari-consonant match-str) | |
594 | (setq cons-num (1+ cons-num)))) | |
595 | ;; preceding-r must be attached before the anuswar if exists. | |
596 | (if preceding-r | |
597 | (if last-modifier | |
598 | (setq glyph-str (concat (substring glyph-str 0 -1) | |
599 | "\e$,4"'\e(B" (substring glyph-str -1))) | |
600 | (setq glyph-str (concat glyph-str "\e$,4"'\e(B")))) | |
601 | (if last-halant (setq glyph-str (concat glyph-str "\e$,4""\e(B"))) | |
602 | ;;; *** glyph-to-glyph conversion *** | |
603 | (when (string-match dev-glyph-glyph-regexp glyph-str) | |
604 | (setq glyph-str | |
605 | (replace-match (gethash (match-string 0 glyph-str) | |
606 | dev-glyph-glyph-hash) | |
607 | nil t glyph-str)) | |
608 | (if (and (> cons-num 1) | |
609 | (string-match dev-glyph-glyph-2-regexp glyph-str)) | |
610 | (setq glyph-str | |
611 | (replace-match (gethash (match-string 0 glyph-str) | |
612 | dev-glyph-glyph-2-hash) | |
613 | nil t glyph-str)))) | |
614 | ;;; *** glyph reordering *** | |
615 | (while (setq split-pos (string-match "\e$,4""\e(B\\|.$" glyph-str)) | |
616 | (setq glyph-block (substring glyph-str 0 (1+ split-pos))) | |
617 | (setq glyph-str (substring glyph-str (1+ split-pos))) | |
618 | (setq | |
619 | glyph-block | |
620 | (if (string-match dev-glyph-right-modifier-regexp glyph-block) | |
621 | (sort (string-to-list glyph-block) | |
622 | (function (lambda (x y) | |
623 | (< (get-char-code-property x 'composition-order) | |
624 | (get-char-code-property y 'composition-order))))) | |
625 | (sort (string-to-list glyph-block) | |
626 | (function (lambda (x y) | |
627 | (let ((xo (get-char-code-property x 'composition-order)) | |
628 | (yo (get-char-code-property y 'composition-order))) | |
629 | (if (= xo 2) nil (if (= yo 2) t (< xo yo))))))))) | |
630 | (setq glyph-str-list (nconc glyph-str-list glyph-block))) | |
631 | ;; concatenate and attach reference-points. | |
632 | (setq glyph-str | |
633 | (cdr | |
634 | (apply | |
635 | 'nconc | |
636 | (mapcar | |
637 | (function (lambda (x) | |
638 | (list | |
639 | (or (get-char-code-property x 'reference-point) | |
640 | '(5 . 3) ;; default reference point. | |
641 | ) | |
642 | x))) | |
643 | glyph-str-list)))))) | |
77539682 | 644 | (compose-region from to glyph-str))) |
4ed46869 | 645 | |
650e8505 | 646 | (provide 'devan-util) |
02ed5b47 PJ |
647 | |
648 | ;;; devan-util.el ends here |