Commit | Line | Data |
---|---|---|
e8af40ee | 1 | ;;; devan-util.el --- support for Devanagari Script Composition |
4ed46869 | 2 | |
a604c5af | 3 | ;; Copyright (C) 1996, 2001 Free Software Foundation, Inc. |
4ed46869 KH |
4 | |
5 | ;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp> | |
6 | ||
7 | ;; Keywords: multilingual, Indian, Devanagari | |
8 | ||
9 | ;; This file is part of GNU Emacs. | |
10 | ||
11 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
12 | ;; it under the terms of the GNU General Public License as published by | |
13 | ;; the Free Software Foundation; either version 2, or (at your option) | |
14 | ;; any later version. | |
15 | ||
16 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
17 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | ;; GNU General Public License for more details. | |
20 | ||
21 | ;; You should have received a copy of the GNU General Public License | |
e803d6bd KH |
22 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
23 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
24 | ;; Boston, MA 02111-1307, USA. | |
4ed46869 KH |
25 | |
26 | ;;; Commentary: | |
27 | ||
28 | ;; History: | |
29 | ;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp> | |
37cdc7ad KH |
30 | ;; 1997.3.24 fixed some bugs. |
31 | ||
32 | ;; Future work :: | |
33 | ;; Decompose the input characters and process them on the character basis. | |
4ed46869 KH |
34 | |
35 | ;; Devanagari script composition rules and related programs. | |
36 | ||
37 | ;;; Code: | |
38 | ||
39 | ;;; | |
40 | ;;; Steps toward composition of Devanagari Characters. | |
41 | ;;; | |
45717142 | 42 | |
4ed46869 KH |
43 | ;;; Basic functions. |
44 | ||
45 | ;;;###autoload | |
31ce7719 KH |
46 | (defun indian-to-devanagari (char) |
47 | "Convert IS 13194 character CHAR to Devanagari basic characters. | |
48 | If CHAR is not IS 13194, return CHAR as is." | |
49 | (let ((charcodes (split-char char))) | |
4ed46869 KH |
50 | (if (eq (car charcodes) 'indian-is13194) |
51 | (make-char 'indian-2-column ?\x21 (nth 1 charcodes)) | |
31ce7719 | 52 | char))) |
4ed46869 KH |
53 | |
54 | ;;;###autoload | |
31ce7719 KH |
55 | (defun devanagari-to-indian (char) |
56 | "Convert Devanagari basic character CHAR to IS 13194 characters. | |
57 | If CHAR is not Devanagari basic character, return CHAR as is." | |
58 | (let ((charcodes (split-char char))) | |
4ed46869 KH |
59 | (if (and (eq (car charcodes) 'indian-2-column) |
60 | (= (nth 1 charcodes) ?\x21)) | |
61 | (make-char 'indian-is13194 (nth 2 charcodes)) | |
31ce7719 | 62 | char))) |
4ed46869 KH |
63 | |
64 | ;;;###autoload | |
65 | (defun indian-to-devanagari-region (from to) | |
31ce7719 KH |
66 | "Convert IS 13194 characters in region to Devanagari basic characters. |
67 | When called from a program, expects two arguments, | |
68 | positions (integers or markers) specifying the region." | |
4ed46869 | 69 | (interactive "r") |
31ce7719 KH |
70 | (save-excursion |
71 | (goto-char from) | |
72 | (while (< (point) to) | |
73 | (let ((char (following-char))) | |
74 | (if (eq (char-charset char) 'indian-is13194) | |
75 | (progn | |
76 | (delete-char 1) | |
77 | (insert (indian-to-devanagari char))) | |
78 | (forward-char 1)))))) | |
4ed46869 KH |
79 | |
80 | ;;;###autoload | |
81 | (defun devanagari-to-indian-region (from to) | |
31ce7719 KH |
82 | "Convert Devanagari basic characters in region to Indian characters. |
83 | When called from a program, expects two arguments, | |
84 | positions (integers or markers) specifying the region." | |
4ed46869 | 85 | (interactive "r") |
31ce7719 KH |
86 | (save-excursion |
87 | (goto-char from) | |
88 | (while (< (point) to) | |
89 | (let ((char (following-char))) | |
90 | (if (eq (char-charset char) 'indian-2-column) | |
91 | (progn | |
49e4e3c6 | 92 | (delete-char 1) |
31ce7719 KH |
93 | (insert (devanagari-to-indian char))) |
94 | (forward-char 1)))))) | |
4ed46869 KH |
95 | |
96 | ;;;###autoload | |
31ce7719 KH |
97 | (defun indian-to-devanagari-string (string) |
98 | "Convert Indian characters in STRING to Devanagari Basic characters." | |
99 | (let* ((len (length string)) | |
27463ede KH |
100 | (i 0) |
101 | (vec (make-vector len 0))) | |
102 | (while (< i len) | |
31ce7719 | 103 | (aset vec i (indian-to-devanagari (aref string i))) |
27463ede KH |
104 | (setq i (1+ i))) |
105 | (concat vec))) | |
4ed46869 KH |
106 | |
107 | ;; Phase 0 - Determine whether the characters can be composed. | |
108 | ;; | |
109 | ;;; | |
110 | ;;; Regular expressions to split characters for composition. | |
111 | ;;; | |
112 | ;; | |
113 | ;; Indian script word contains one or more syllables. | |
114 | ;; In BNF, it can be expressed as follows: | |
115 | ;; | |
116 | ;; Word ::= {Syllable} [Cons-Syllable] | |
117 | ;; Syllable ::= Cons-Vowel-Syllable | Vowel-Syllable | |
118 | ;; Vowel-Syllable ::= V[D] | |
119 | ;; Cons-Vowel-Syllable ::= [Cons-Syllable] Full-Cons [M] [D] | |
bd09f27b | 120 | ;; Cons-Syllable ::= [Pure-Cons] [Pure-Cons] [Pure-Cons] Pure-Cons |
4ed46869 KH |
121 | ;; Pure-Cons ::= Full-Cons H |
122 | ;; Full-Cons ::= C [N] | |
123 | ;; | |
124 | ;; {} repeat, [] optional | |
125 | ;; | |
126 | ;; C - Consonant (\e$(5!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A!B!C!D!E\e(B | |
127 | ;; \e$(5!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X\e(B) | |
128 | ;; N - Nukta (\e$(5!i\e(B) | |
bd09f27b KH |
129 | ;; H - Halant(\e$(5!h\e(B) or Virama |
130 | ;; V - Vowel (\e$(5!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2#&#'#*\e(B) | |
131 | ;; ("\e$(5#&#'#*\e(B" can be obtained by IS13194 vowels with nukta.) | |
37cdc7ad KH |
132 | ;; D - Vowel Modifiers, i.e. Anuswar, Chandrabindu (\e$(5!!!"\e(B) |
133 | ;; (Visaraga (\e$(5!#\e(B) is excluded.) | |
bd09f27b KH |
134 | ;; M - Matra (\e$(5!Z![!\!]!^!_!`!a!b!c!d!e!f!g#K#L#M\e(B) |
135 | ;; ("\e$(5#K#L#M\e(B" can be obtained by IS13194 matras with nukta.) | |
4ed46869 | 136 | ;; |
37cdc7ad KH |
137 | ;; In Emacs, one syllable of Indian language is considered to be one |
138 | ;; composite glyph. If we expand the above expression for | |
139 | ;; cons-vowel-syllable, it would be: | |
4ed46869 | 140 | ;; |
37cdc7ad | 141 | ;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] [D] |
4ed46869 | 142 | ;; |
37cdc7ad | 143 | ;; Therefore, in worst case, the one syllable may contain |
4ed46869 KH |
144 | ;; following characters. |
145 | ;; | |
bd09f27b KH |
146 | ;; C N H C N H C N H C N H C N M D |
147 | ;; | |
37cdc7ad KH |
148 | ;; The example is a sanskrit word "kArtsnya", where five consecutive |
149 | ;; consonants appear. | |
4ed46869 | 150 | ;; |
37cdc7ad KH |
151 | ;; On the other hand, consonant-syllable, which appears at the end of |
152 | ;; the word, would have the following expression: | |
4ed46869 | 153 | ;; |
bd09f27b | 154 | ;; [C [N] H] [C [N] H] [C [N] H] C [N] H |
4ed46869 KH |
155 | ;; |
156 | ;; This is acceptable BEFORE proper consonant-syllable is input. The | |
157 | ;; string which doesn't match with the above expression is invalid and | |
158 | ;; thus must be fixed. | |
159 | ;; | |
160 | ;; Note: | |
907c83c8 | 161 | ;; Third case can be considered, which is an acceptable syllable and can |
4ed46869 KH |
162 | ;; not add any code more. |
163 | ;; | |
bd09f27b | 164 | ;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] D |
4ed46869 KH |
165 | ;; |
166 | ;; However, to make editing possible even in this condition, we will | |
167 | ;; not consider about this case. | |
37cdc7ad KH |
168 | ;; |
169 | ;; Note: | |
170 | ;; Currently, it seems that the only following consonants would have | |
171 | ;; Nukta sign attatched. | |
172 | ;; (\e$(5!3!4!5!:!?!@!I\e(B) | |
173 | ;; Therefore, [\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B? can be re-written as | |
174 | ;; \\([\e$(5!3!4!5!:!?!@!I\e(B]\e$(5!i\e(B\\)\\|[\e$(5!3\e(B-\e$(5!X\e(B] | |
175 | ||
176 | (defconst devanagari-full-cons | |
177 | "\\(\\([\e$(5!3!4!5!:!?!@!I\e(B]\e$(5!i\e(B\\)\\|[\e$(5!3\e(B-\e$(5!X$.$E"%\e(B]\\)" | |
178 | "Devanagari full consonant") | |
179 | ||
180 | (defconst devanagari-pure-cons | |
181 | (concat "\\(" devanagari-full-cons "\e$(5!h\e(B\\)") | |
182 | "Devanagari pure consonant") | |
183 | ||
184 | (defconst devanagari-matra | |
185 | "\\(\\([\e$(5!_![!\\e(B]\e$(5!i\e(B\\)\\|[\e$(5!Z\e(B-\e$(5!g#K#L#M\e(B]\\)" | |
186 | "Devanagari Matra Signs. '\e$(5#K#L#M\e(B' can also be created from the combination | |
187 | of '\e$(5!_![!\\e(B' and nukta sign.") | |
188 | ||
189 | (defconst devanagari-vowel | |
190 | "\\(\\([\e$(5!*!&!'\e(B]\e$(5!i\e(B\\)\\|[\e$(5!$\e(B-\e$(5!2#&#'#*\e(B]\\)" | |
191 | "Devanagari Vowels. '\e$(5#&#'#*\e(B' can also be created from the combination | |
192 | of '\e$(5!*!&!'\e(B' and nukta sign.") | |
193 | ||
194 | (defconst devanagari-vowel-syllable | |
195 | (concat devanagari-vowel "[\e$(5!!!"\e(B]?") | |
196 | "Devanagari vowel syllable.") | |
197 | ||
198 | (defconst devanagari-cons-syllable | |
199 | (concat devanagari-pure-cons "?" devanagari-pure-cons "?" | |
200 | devanagari-pure-cons "?" devanagari-pure-cons "$") | |
201 | "Devanagari consonant syllable") | |
202 | ||
203 | (defconst devanagari-cons-vowel-syllable | |
204 | (concat "\\(" | |
205 | devanagari-pure-cons "?" devanagari-pure-cons "?" | |
206 | devanagari-pure-cons "?" devanagari-pure-cons "\\)?" | |
207 | devanagari-full-cons devanagari-matra "?[\e$(5!!!"\e(B]?") | |
208 | "Devanagari consonant vowel syllable.") | |
4ed46869 KH |
209 | |
210 | ;; | |
211 | ;; Also, digits and virams should be processed other than syllables. | |
212 | ;; | |
213 | ;; In IS 13194, Avagrah is obtained by Nukta after Viram, and | |
214 | ;; OM is obtained by Nukta after Chandrabindu | |
215 | ;; | |
37cdc7ad KH |
216 | |
217 | (defconst devanagari-digit-viram-visarga | |
bb6c9254 KH |
218 | "[\e$(5!q\e(B-\e$(5!z!j!#\e(B]") |
219 | ||
37cdc7ad | 220 | (defconst devanagari-other-sign |
bd09f27b | 221 | "\\([\e$(5!!!j\e(B]\e$(5!i\e(B\\)\\|\\([\e$(5#!#J\e(B]\\)") |
4ed46869 | 222 | |
37cdc7ad KH |
223 | (defconst devanagari-composite-glyph-unit |
224 | (concat "\\(" devanagari-cons-syllable | |
225 | "\\)\\|\\(" devanagari-vowel-syllable | |
37cdc7ad | 226 | "\\)\\|\\(" devanagari-cons-vowel-syllable |
bb6c9254 KH |
227 | "\\)\\|\\(" devanagari-other-sign |
228 | "\\)\\|\\(" devanagari-digit-viram-visarga "\\)") | |
4ed46869 KH |
229 | "Regexp matching to Devanagari string to be composed form one glyph.") |
230 | ||
231 | ;;(put-charset-property charset-devanagari-1-column | |
232 | ;; 'char-to-glyph 'devanagari-compose-string) | |
233 | ;;(put-charset-property charset-devanagari-2-column | |
234 | ;; 'char-to-glyph 'devanagari-compose-string) | |
235 | ||
236 | ;; Sample | |
237 | ;; | |
37cdc7ad KH |
238 | ;;(string-match devanagari-cons-vowel-syllable-examine "\e$(5!X![\e(B") => 0 |
239 | ;;(string-match devanagari-cons-vowel-syllable-examine "\e$(5!F!h!D!\\e(B") => 0 | |
240 | ;;(string-match devanagari-cons-vowel-syllable-examine "\e$(5!X![!F!h!D!\\e(B") => 0 | |
4ed46869 KH |
241 | |
242 | ;; | |
243 | ;; Steps toward the composition | |
37cdc7ad | 244 | ;; Converting Character Codes to Composite Glyph. |
4ed46869 KH |
245 | ;; |
246 | ;; Example : \e$(5!X![\e(B/\e$(5!F!h!D!\\e(B | |
247 | ;; | |
248 | ;; First, convert Characters to appropriate glyphs. | |
249 | ;; | |
250 | ;; => \e$(5!X![\e(B/\e$(5"F!D!\\e(B | |
251 | ;; | |
252 | ;; Then, determine the base glyph, apply-orders and apply-rules. | |
253 | ;; | |
254 | ;; => \e$(5!X\e(B (ml.mr) \e$(5![\e(B / \e$(5!D\e(B (ml.mr) \e$(5"F\e(B (mr ml) \e$(5!\\e(B | |
255 | ;; | |
256 | ;; Finally, convert 2-column glyphs to 1-column glyph | |
257 | ;; if such a glyph exist. | |
258 | ;; | |
31ce7719 | 259 | ;; => \e$(6!X\e(B (ml.mr) \e$(6![\e(B / \e$(6!D\e(B (ml.mr) \e$(6"F\e(B (mr ml) \e$(6!\\e(B |
4ed46869 KH |
260 | ;; |
261 | ;; Compose the glyph. | |
262 | ;; | |
31ce7719 KH |
263 | ;; => \e4\e$(6!Xt%![\e0!X![\e1\e(B/\e4\e$(6!Dt%"Fv#!\\e0!D"F!\\e1\e(B |
264 | ;; => \e4\e$(6!Xt%![\e0!X![\e1\e4!Dt%"Fv#!\\e0!D"F!\\e1\e(B | |
4ed46869 KH |
265 | ;; |
266 | ||
267 | ;; | |
268 | ;; Phase 1: Converting Character Code to Glyph Code. | |
269 | ;; | |
270 | ;; | |
271 | ;; IMPORTANT: | |
31ce7719 | 272 | ;; There may be many rules that you many want to suppress. |
4ed46869 KH |
273 | ;; In that case, please comment out that rule. |
274 | ;; | |
275 | ;; RULES WILL BE EVALUATED FROM FIRST TO LAST. | |
276 | ;; PUT MORE SPECIFIC RULES FIRST. | |
277 | ;; | |
278 | ;; TO DO: | |
279 | ;; Prepare multiple specific list of rules for each languages | |
31ce7719 | 280 | ;; that adopt Devanagari script. |
4ed46869 KH |
281 | ;; |
282 | ||
4ed46869 KH |
283 | (defconst devanagari-char-to-glyph-rules |
284 | '( | |
4ed46869 KH |
285 | |
286 | ;; `r' at the top of syllable and followed by other consonants. | |
37cdc7ad KH |
287 | ;; ("[^\e$(5!h\e(B]\\(\e$(5!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" "\e$(5"p\e(B") |
288 | ("^\\(\e$(5!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" "\e$(5"p\e(B") | |
4ed46869 KH |
289 | |
290 | ;; Ligature Rules | |
37cdc7ad KH |
291 | ("\\(\e$(5!3!h!B!h!O!h!M\e(B\\)" "\e$(5$!\e(B" sanskrit) |
292 | ("\\(\e$(5!3!h!B!h!T\e(B\\)" "\e$(5$"\e(B" sanskrit) | |
293 | ("\\(\e$(5!3!h!B!h!M\e(B\\)" "\e$(5$#\e(B" sanskrit) | |
294 | ("\\(\e$(5!3!h!F!h!M\e(B\\)" "\e$(5$$\e(B") | |
295 | ("\\(\e$(5!3!h!O!h!M\e(B\\)" "\e$(5$%\e(B") | |
296 | ("\\(\e$(5!3!h!O\e(B\\)" "\e$(5"#\e(B") ; Post "r" | |
297 | ("\\(\e$(5!3!h!T!h!M\e(B\\)" "\e$(5$&\e(B" sanskrit) | |
298 | ("\\(\e$(5!3!h\e(B\\)\e$(5!3!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"3\e(B") ; Special Half Form | |
299 | ("\\(\e$(5!3!h!3\e(B\\)" "\e$(5$'\e(B") | |
300 | ("\\(\e$(5!3!h\e(B\\)\e$(5!B!h!O\e(B" "\e$(5"3\e(B") ; Special Rules for "k-tr" | |
301 | ("\\(\e$(5!3!h!B\e(B\\)" "\e$(5$(\e(B") | |
302 | ("\\(\e$(5!3!h!F\e(B\\)" "\e$(5$)\e(B") | |
303 | ("\\(\e$(5!3!h!L\e(B\\)" "\e$(5$*\e(B") | |
304 | ("\\(\e$(5!3!h!M\e(B\\)" "\e$(5$+\e(B") | |
305 | ("\\(\e$(5!3!h!Q\e(B\\)" "\e$(5$,\e(B") | |
306 | ("\\(\e$(5!3!h!T\e(B\\)" "\e$(5$-\e(B") | |
307 | ("\\(\e$(5!3!h!V!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"l\e(B") ; Half Form | |
308 | ("\\(\e$(5$.!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"l\e(B") ; Half Form | |
309 | ("\\(\e$(5!3!h!V\e(B\\)" "\e$(5$.\e(B") | |
310 | ("\\(\e$(5!3!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"3\e(B") ; Half Form | |
311 | ("\\(\e$(5!3!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"s\e(B") ; Nukta Half Form | |
312 | ("\\(\e$(5!3!i\e(B\\)" "\e$(5#3\e(B") ; Nukta | |
313 | ("\\(\e$(5!4!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"4\e(B") ; Half Form | |
314 | ("\\(\e$(5!4!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"t\e(B") ; Nukta Half Form | |
315 | ("\\(\e$(5!4!i\e(B\\)" "\e$(5#4\e(B") ; Nukta | |
316 | ("\\(\e$(5!5!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"`\e(B") ; Half Form | |
317 | ("\\(\e$(5!5!h!O\e(B\\)" "\e$(5"$\e(B") ; Post "r" | |
318 | ("\\(\e$(5!5!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"5\e(B") ; Half Form | |
319 | ("\\(\e$(5!5!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"u\e(B") ; Nukta Half Form | |
320 | ("\\(\e$(5!5!i\e(B\\)" "\e$(5#5\e(B") ; Nukta | |
321 | ("\\(\e$(5!6!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"a\e(B") ; Half Form | |
322 | ("\\(\e$(5!6!h!F\e(B\\)" "\e$(5$/\e(B") | |
323 | ; Slot | |
324 | ("\\(\e$(5!6!h!O\e(B\\)" "\e$(5!6"q\e(B") ; Post "r" | |
325 | ("\\(\e$(5!6!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"6\e(B") ; Half Form | |
326 | ("\\(\e$(5!7!h!3!h!B!h!M\e(B\\)" "\e$(5$0\e(B" sanskrit) | |
327 | ("\\(\e$(5!7!h!3!h!V!h!T\e(B\\)" "\e$(5$1\e(B" sanskrit) | |
328 | ("\\(\e$(5!7!h!3!h!B\e(B\\)" "\e$(5$2\e(B" sanskrit) | |
329 | ("\\(\e$(5!7!h!3!h!V\e(B\\)" "\e$(5$3\e(B" sanskrit) | |
330 | ("\\(\e$(5!7!h!3!h!O\e(B\\)" "\e$(5$9"q\e(B") ; Special Rule. May be precomposed font needed. | |
331 | ("\\(\e$(5!7!h!6!h!O\e(B\\)" "\e$(5$4\e(B" sanskrit) | |
332 | ("\\(\e$(5!7!h!3!h!M\e(B\\)" "\e$(5$5\e(B" sanskrit) | |
333 | ("\\(\e$(5!7!h!4!h!M\e(B\\)" "\e$(5$6\e(B" sanskrit) | |
334 | ("\\(\e$(5!7!h!5!h!M\e(B\\)" "\e$(5$7\e(B" sanskrit) | |
335 | ("\\(\e$(5!7!h!6!h!M\e(B\\)" "\e$(5$8\e(B" sanskrit) | |
336 | ("\\(\e$(5!7!h!3\e(B\\)" "\e$(5$9\e(B") | |
337 | ("\\(\e$(5!7!h!4\e(B\\)" "\e$(5$:\e(B") | |
338 | ("\\(\e$(5!7!h!5!h!O\e(B\\)" "\e$(5$;"q\e(B") ; Special Rule. May be precomposed font needed. | |
339 | ("\\(\e$(5!7!h!5\e(B\\)" "\e$(5$;\e(B") | |
340 | ("\\(\e$(5!7!h!6\e(B\\)" "\e$(5$<\e(B") | |
341 | ("\\(\e$(5!7!h!7\e(B\\)" "\e$(5$=\e(B") | |
342 | ("\\(\e$(5!7!h!F\e(B\\)" "\e$(5$>\e(B") | |
343 | ("\\(\e$(5!7!h!L\e(B\\)" "\e$(5$?\e(B") | |
344 | ("\\(\e$(5!7!h!M\e(B\\)" "\e$(5$@\e(B") | |
345 | ("\\(\e$(5!8!h\e(B\\)[\e$(5!8!<\e(B]\e$(5!h\e(B" "\e$(5"8\e(B") ; Half Form | |
346 | ("\\(\e$(5!8!h!8\e(B\\)" "\e$(5$A\e(B") | |
347 | ("\\(\e$(5!8!h!<\e(B\\)" "\e$(5$B\e(B") | |
348 | ("\\(\e$(5!8!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"8"q\e(B") ; Half Form Post "r" | |
349 | ("\\(\e$(5!8!h!O\e(B\\)" "\e$(5!8"q\e(B") ; Post "r" | |
350 | ("\\(\e$(5!8!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"8\e(B") ; Half Form | |
351 | ("\\(\e$(5!9!h!M\e(B\\)" "\e$(5$C\e(B") | |
352 | ("\\(\e$(5!:!h!O\e(B\\)" "\e$(5$D\e(B") | |
353 | ("\\(\e$(5!:!h!<!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"m\e(B") ; Half Form | |
354 | ("\\(\e$(5!:!h!<\e(B\\)" "\e$(5$E\e(B") | |
355 | ("\\(\e$(5!:!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5":\e(B") ; Half Form | |
356 | ("\\(\e$(5!:!i!h!O\e(B\\)" "\e$(5"!\e(B") ; Nukta Post "r" | |
357 | ("\\(\e$(5!:!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"z\e(B") ; Nukta Half Form | |
358 | ("\\(\e$(5!:!i\e(B\\)" "\e$(5#:\e(B") ; Nukta | |
359 | ("\\(\e$(5!;!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5";\e(B") ; Half Form | |
360 | ("\\(\e$(5!<!h\e(B\\)\e$(5!8!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"<\e(B") ; Special Half Form | |
361 | ("\\(\e$(5!<!h!8\e(B\\)" "\e$(5$F\e(B") | |
362 | ("\\(\e$(5!<!h\e(B\\)\e$(5!:!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"<\e(B") ; Special Half Form | |
363 | ("\\(\e$(5!<!h!:\e(B\\)" "\e$(5$G\e(B") | |
364 | ("\\(\e$(5!<!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"<\e(B") ; Half Form | |
365 | ("\\(\e$(5!=!h!3\e(B\\)" "\e$(5$H\e(B") | |
366 | ("\\(\e$(5!=!h!=\e(B\\)" "\e$(5$I\e(B") | |
367 | ("\\(\e$(5!=!h!>\e(B\\)" "\e$(5$J\e(B") | |
368 | ("\\(\e$(5!=!h!M\e(B\\)" "\e$(5$K\e(B") | |
369 | ("\\(\e$(5!>!h!M\e(B\\)" "\e$(5$L\e(B") | |
370 | ("\\(\e$(5!?!h!5!h!M\e(B\\)" "\e$(5$M\e(B" sanskrit) | |
371 | ("\\(\e$(5!?!h!6!h!O\e(B\\)" "\e$(5$N\e(B" sanskrit) | |
372 | ("\\(\e$(5!?!h!O!h!M\e(B\\)" "\e$(5$O\e(B") | |
373 | ("\\(\e$(5!?!h!5\e(B\\)" "\e$(5$P\e(B") | |
374 | ("\\(\e$(5!?!h!6\e(B\\)" "\e$(5$Q\e(B") | |
375 | ("\\(\e$(5!?!h!?\e(B\\)" "\e$(5$R\e(B") | |
376 | ("\\(\e$(5!?!h!L\e(B\\)" "\e$(5$S\e(B") | |
377 | ("\\(\e$(5!?!h!M\e(B\\)" "\e$(5$T\e(B") | |
378 | ("\\(\e$(5!?!i\e(B\\)" "\e$(5#?\e(B") ; Nukta | |
379 | ("\\(\e$(5!@!h!M\e(B\\)" "\e$(5$`\e(B") | |
380 | ("\\(\e$(5!@!i\e(B\\)" "\e$(5#@\e(B") ; Nukta | |
381 | ("\\(\e$(5!A!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"A\e(B") ; Half Form | |
382 | ("\\(\e$(5!B!h\e(B\\)\e$(5!B!h!O\e(B" "\e$(5"B\e(B") ; Special Rule for "t-tr" | |
383 | ("\\(\e$(5!B!h!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"c\e(B") ; Half Form | |
384 | ("\\(\e$(5!B!h!B\e(B\\)" "\e$(5$a\e(B") | |
385 | ("\\(\e$(5!B!h!F\e(B\\)" "\e$(5$b\e(B") | |
386 | ("\\(\e$(5!B!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"d\e(B") ; Half Form Post "r" | |
387 | ("\\(\e$(5!B!h!O\e(B\\)" "\e$(5"%\e(B") ; Post "r" | |
388 | ("\\(\e$(5!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"B\e(B") ; Half Form | |
389 | ("\\(\e$(5!C!h!O\e(B\\)" "\e$(5!C"q\e(B") ; Post "r" | |
390 | ("\\(\e$(5!C!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"C\e(B") ; Half Form | |
391 | ("\\(\e$(5!D!h!D!h!M\e(B\\)" "\e$(5$c\e(B") | |
392 | ("\\(\e$(5!D!h!E!h!M\e(B\\)" "\e$(5$d\e(B") | |
393 | ("\\(\e$(5!D!h!K!h!M\e(B\\)" "\e$(5$e\e(B") | |
394 | ("\\(\e$(5!D!h!K!h!O\e(B\\)" "\e$(5$r"r\e(B") ; Special Case for "dbhr" ; *** | |
395 | ("\\(\e$(5!D!h!O!h!M\e(B\\)" "\e$(5$f\e(B") | |
396 | ("\\(\e$(5!D!h!T!h!M\e(B\\)" "\e$(5$g\e(B") | |
397 | ("\\(\e$(5!D!h!5!h!O\e(B\\)" "\e$(5$h\e(B") | |
398 | ("\\(\e$(5!D!h!6!h!O\e(B\\)" "\e$(5$i\e(B") | |
399 | ("\\(\e$(5!D!h!D!h!T\e(B\\)" "\e$(5$j\e(B") | |
400 | ("\\(\e$(5!D!h!E!h!T\e(B\\)" "\e$(5$k\e(B") | |
401 | ("\\(\e$(5!D!h\e(B\\)\e$(5!E!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5!D!h\e(B") ; Special Half Form (for ddhra) | |
402 | ("\\(\e$(5!D!h!5\e(B\\)" "\e$(5$l\e(B") | |
403 | ("\\(\e$(5!D!h!6\e(B\\)" "\e$(5$m\e(B") | |
404 | ("\\(\e$(5!D!h!D\e(B\\)" "\e$(5$n\e(B") | |
405 | ("\\(\e$(5!D!h!E\e(B\\)" "\e$(5$o\e(B") | |
406 | ("\\(\e$(5!D!h!F\e(B\\)" "\e$(5$p\e(B") | |
407 | ("\\(\e$(5!D!h\e(B\\)\e$(5!J!h\e(B" "\e$(5!D!h\e(B") ; Suppressing "db-" | |
408 | ("\\(\e$(5!D!h!J\e(B\\)" "\e$(5$q\e(B") | |
409 | ("\\(\e$(5!D!h!K\e(B\\)" "\e$(5$r\e(B") | |
410 | ("\\(\e$(5!D!h!L\e(B\\)" "\e$(5$s\e(B") | |
411 | ("\\(\e$(5!D!h!M\e(B\\)" "\e$(5$t\e(B") | |
412 | ("\\(\e$(5!D!h!T\e(B\\)" "\e$(5$u\e(B") | |
413 | ("\\(\e$(5!E!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"e\e(B") ; Half Form | |
414 | ("\\(\e$(5!E!h!F\e(B\\)" "\e$(5$v\e(B") | |
415 | ("\\(\e$(5!E!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"f\e(B") ; Half Form Post "r" | |
416 | ("\\(\e$(5!E!h!O\e(B\\)" "\e$(5!E"q\e(B") ; Post "r" | |
417 | ("\\(\e$(5!E!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"E\e(B") ; Half Form | |
418 | ("\\(\e$(5!F!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"k\e(B") ; Half Form | |
419 | ("\\(\e$(5!F!h!F\e(B\\)" "\e$(5$w\e(B") | |
420 | ("\\(\e$(5!F!h!O\e(B\\)" "\e$(5!F"q\e(B") | |
421 | ("\\(\e$(5!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"F\e(B") ; Half Form | |
422 | ("\\(\e$(5!G!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"G\e(B") ; Nukta Half Form | |
423 | ("\\(\e$(5!H!h\e(B\\)\e$(5!B!h!O\e(B" "\e$(5"H\e(B") ; Special Rule for "p-tr" | |
424 | ("\\(\e$(5!H!h!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"g\e(B") ; Half Form | |
425 | ("\\(\e$(5!H!h!B\e(B\\)" "\e$(5$x\e(B") | |
426 | ("\\(\e$(5!H!h!F\e(B\\)" "\e$(5$y\e(B") | |
427 | ("\\(\e$(5!H!h!Q\e(B\\)" "\e$(5$z\e(B") | |
428 | ("\\(\e$(5!H!h!O\e(B\\)" "\e$(5"&\e(B") ; Post "r" | |
429 | ("\\(\e$(5!H!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"H\e(B") ; Half Form | |
430 | ("\\(\e$(5!I!h!O\e(B\\)" "\e$(5"'\e(B") ; Post "r" | |
431 | ("\\(\e$(5!I!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"I\e(B") ; Half Form | |
432 | ("\\(\e$(5!I!i!h!O\e(B\\)" "\e$(5""\e(B") ; Nukta Post "r" | |
433 | ("\\(\e$(5!I!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"y\e(B") ; Nukta Half Form | |
434 | ("\\(\e$(5!I!i\e(B\\)" "\e$(5#I\e(B") ; Nukta | |
435 | ("\\(\e$(5!J!h\e(B\\)\e$(5!F!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"J\e(B") ; Special Half Form | |
436 | ("\\(\e$(5!J!h!F\e(B\\)" "\e$(5${\e(B") | |
437 | ("\\(\e$(5!J!h\e(B\\)\e$(5!J!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"J\e(B") ; Special Half Form | |
438 | ("\\(\e$(5!J!h!J\e(B\\)" "\e$(5$|\e(B") | |
439 | ("\\(\e$(5!J!h\e(B\\)\e$(5!T!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"J\e(B") ; Special Half Form | |
440 | ("\\(\e$(5!J!h!T\e(B\\)" "\e$(5$}\e(B") | |
441 | ("\\(\e$(5!J!h!O\e(B\\)" "\e$(5!J"q\e(B") ; Post "r" | |
442 | ("\\(\e$(5!J!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"J\e(B") ; Half Form | |
443 | ("\\(\e$(5!K!h!F\e(B\\)" "\e$(5$~\e(B") | |
444 | ("\\(\e$(5!K!h!O\e(B\\)" "\e$(5!K"q\e(B") ; Post "r" | |
445 | ("\\(\e$(5!K!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"K\e(B") ; Half Form | |
446 | ("\\(\e$(5!L!h!F\e(B\\)" "\e$(5#P\e(B") | |
447 | ("\\(\e$(5!L!h!Q\e(B\\)" "\e$(5#Q\e(B") | |
448 | ("\\(\e$(5!L!h!O\e(B\\)" "\e$(5!L"q\e(B") ; Post "r" | |
449 | ("\\(\e$(5!L!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"L\e(B") ; Half Form | |
450 | ("\\(\e$(5!M!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"M\e(B") ; Half Form | |
451 | ("\\(\e$(5!N!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"N\e(B") ; Half Form | |
452 | ;; special form for "ru". | |
453 | ("\\(\e$(5!O!]\e(B\\)" "\e$(5",\e(B") | |
454 | ("\\(\e$(5!O!^\e(B\\)" "\e$(5"-\e(B") | |
455 | ("\\(\e$(5!P!]\e(B\\)" "\e$(5".\e(B") | |
456 | ("\\(\e$(5!P!^\e(B\\)" "\e$(5"/\e(B") | |
4ed46869 | 457 | ;; |
37cdc7ad KH |
458 | ("\\(\e$(5!Q!h!Q\e(B\\)" "\e$(5#`\e(B" sanskrit) |
459 | ("\\(\e$(5!Q!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"Q\e(B") ; Half Form | |
460 | ("\\(\e$(5!R!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"R\e(B") ; Half Form | |
461 | ("\\(\e$(5!S!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"S\e(B") ; Half Form | |
462 | ("\\(\e$(5!T!h!F\e(B\\)" "\e$(5#a\e(B") | |
463 | ("\\(\e$(5!T!h!T\e(B\\)" "\e$(5#b\e(B") | |
464 | ("\\(\e$(5!T!h!O\e(B\\)" "\e$(5!T"q\e(B") ; Post "r" | |
465 | ("\\(\e$(5!T!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"T\e(B") ; Half Form | |
466 | ("\\(\e$(5!U!h!8!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"h\e(B") ; Half Form | |
467 | ("\\(\e$(5!U!h!8\e(B\\)" "\e$(5#c\e(B") | |
468 | ("\\(\e$(5!U!h!F\e(B\\)" "\e$(5#d\e(B") | |
469 | ("\\(\e$(5!U!h!J\e(B\\)" "\e$(5#e\e(B") | |
470 | ("\\(\e$(5!U!h!Q\e(B\\)" "\e$(5#f\e(B") | |
471 | ("\\(\e$(5!U!h\e(B\\)\e$(5!T!h!O\e(B" "\e$(5"U\e(B") ; Special Half Form | |
472 | ("\\(\e$(5!U!h!T!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"j\e(B") ; Half Form | |
473 | ; ("\\(\e$(5!U!h!T\e(B\\)" "\e$(5#g\e(B") | |
474 | ("\\(\e$(5!U!h!O!h!T\e(B\\)" "\e$(5#g\e(B") | |
475 | ("\\(\e$(5!U!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"i\e(B") ; Half Form | |
476 | ("\\(\e$(5!U!h!O\e(B\\)" "\e$(5")\e(B") ; Post "r" | |
477 | ("\\(\e$(5!U!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"U\e(B") ; Half Form | |
478 | ("\\(\e$(5!V!h!=!h!O!h!M\e(B\\)" "\e$(5#h\e(B") | |
479 | ("\\(\e$(5!V!h!=!h!M\e(B\\)" "\e$(5#i\e(B") | |
480 | ("\\(\e$(5!V!h!=!h!T\e(B\\)" "\e$(5#j\e(B") | |
481 | ("\\(\e$(5!V!h!=\e(B\\)" "\e$(5#k\e(B") | |
482 | ("\\(\e$(5!V!h!>\e(B\\)" "\e$(5#l\e(B") | |
483 | ("\\(\e$(5!V!h!O\e(B\\)" "\e$(5!V"q\e(B") ; Post "r" | |
484 | ("\\(\e$(5!V!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"V\e(B") ; Half Form | |
485 | ("\\(\e$(5!W!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"W"F\e(B") ; Special Half Form | |
486 | ("\\(\e$(5!W!h!F\e(B\\)" "\e$(5#m\e(B") | |
487 | ("\\(\e$(5!W!h!O\e(B\\)" "\e$(5#n\e(B") | |
488 | ("\\(\e$(5!W!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"W\e(B") ; Half Form | |
489 | ("\\(\e$(5!X!h!A\e(B\\)" "\e$(5#p\e(B") | |
490 | ("\\(\e$(5!X!h!F\e(B\\)" "\e$(5#q\e(B") | |
491 | ("\\(\e$(5!X!h!L\e(B\\)" "\e$(5#r\e(B") | |
492 | ("\\(\e$(5!X!h!M\e(B\\)" "\e$(5#s\e(B") | |
493 | ("\\(\e$(5!X!h!O\e(B\\)" "\e$(5#t\e(B") | |
494 | ("\\(\e$(5!X!h!Q\e(B\\)" "\e$(5#u\e(B") | |
495 | ("\\(\e$(5!X!h!T\e(B\\)" "\e$(5#v\e(B") | |
496 | ;; Special Ligature Rules | |
497 | ("\\(\e$(5!X!_\e(B\\)" "\e$(5#R\e(B") | |
4ed46869 KH |
498 | |
499 | ;; For consonants other than listed above, glyph-composition will | |
500 | ;; be applied. If the consonant which is preceding "\e$(5!O\e(B" does not | |
501 | ;; have the vertical line (such as "\e$(5!?\e(B"), "\e$(5"r\e(B" is put beneath the | |
502 | ;; consonant. | |
503 | ;; | |
37cdc7ad KH |
504 | ("[\e$(5!7!9!=!>!?!@!D!O!P!R!S!X\e(B]\\(\e$(5!h!O\e(B\\)" "\e$(5"r\e(B") |
505 | ("[\e$(5!6!8!C!E!F!H!J!K!L!M!T!V\e(B]\\(\e$(5!h!O\e(B\\)" "\e$(5"q\e(B") | |
506 | ("\e$(5!?!i\e(B\\(\e$(5!h!O\e(B\\)" "\e$(5"r\e(B") | |
507 | ("\e$(5!@!i\e(B\\(\e$(5!h!O\e(B\\)" "\e$(5"r\e(B") | |
508 | ||
509 | ;; Nukta with Non-Consonants | |
510 | ("\\(\e$(5!!!i\e(B\\)" "\e$(5#!\e(B") | |
511 | ("\\(\e$(5!&!i\e(B\\)" "\e$(5#&\e(B") | |
512 | ("\\(\e$(5!'!i\e(B\\)" "\e$(5#'\e(B") | |
513 | ("\\(\e$(5!*!i\e(B\\)" "\e$(5#*\e(B") | |
514 | ("\\(\e$(5![!i\e(B\\)" "\e$(5#L\e(B") | |
515 | ("\\(\e$(5!\!i\e(B\\)" "\e$(5#M\e(B") | |
516 | ("\\(\e$(5!_!i\e(B\\)" "\e$(5#K\e(B") | |
517 | ("\\(\e$(5!j!i\e(B\\)" "\e$(5#J\e(B") | |
518 | ||
519 | ;; Special rule for "r + some vowels" | |
bb6c9254 KH |
520 | ("\\(\e$(5!O!_!i\e(B\\)" "\e$(5#*"p\e(B") |
521 | ("\\(\e$(5!O![!i\e(B\\)" "\e$(5#&"p\e(B") | |
522 | ("\\(\e$(5!O!\!i\e(B\\)" "\e$(5#'"p\e(B") | |
37cdc7ad | 523 | ("\\(\e$(5!O!_\e(B\\)" "\e$(5!*"p\e(B") |
bd09f27b | 524 | ;; If everything fails, "y" will connect to the front consonant. |
37cdc7ad | 525 | ("\\(\e$(5!h!M\e(B\\)" "\e$(5"]\e(B") |
4ed46869 KH |
526 | ) |
527 | "Alist of regexps of Devanagari character sequences vs composed characters.") | |
528 | ||
4ed46869 KH |
529 | (let ((rules devanagari-char-to-glyph-rules)) |
530 | (while rules | |
531 | (let ((rule (car rules)) | |
37cdc7ad | 532 | (chars) (char) (glyphs) (glyph)) |
4ed46869 KH |
533 | (setq rules (cdr rules)) |
534 | (string-match "\\\\(\\(.+\\)\\\\)" (car rule)) | |
535 | (setq chars (substring (car rule) (match-beginning 1) (match-end 1))) | |
536 | (setq char (string-to-char chars)) | |
37cdc7ad KH |
537 | (setq glyphs (cdr rule)) |
538 | (setq glyph (string-to-char (car glyphs))) | |
4ed46869 | 539 | (put-char-code-property |
37cdc7ad KH |
540 | char 'char-to-glyph |
541 | ;; We don't "cons" it since priority is top to down. | |
542 | (append (get-char-code-property char 'char-to-glyph) (list rule))) | |
543 | ||
544 | (if (and (< ?\e(5z\e(B glyph) ; Glyphs only. | |
545 | (null (get-char-code-property glyph 'glyph-to-char))) | |
546 | ; One glyph may corresponds to multiple characters, | |
547 | ; e.g., surrounding vowel in Tamil, etc. | |
548 | ; but for Devanagari, we put this restriction | |
549 | ; to make sure the fact that one glyph corresponds to one char. | |
550 | (put-char-code-property | |
551 | glyph 'glyph-to-char | |
552 | (cons (list (car glyphs) chars) | |
553 | (get-char-code-property glyph 'glyph-to-char) | |
554 | )))))) | |
4ed46869 KH |
555 | |
556 | ;; | |
37cdc7ad KH |
557 | ;; Function used in both characters-to-glyphs conversion and |
558 | ;; glyphs-to-characters conversion. | |
4ed46869 KH |
559 | ;; |
560 | ||
31ce7719 KH |
561 | (defun max-match-len (regexp) |
562 | "Return the maximum length of text that can match the pattern REGEXP. | |
563 | Only [...] pattern of regexp is recognized." | |
564 | (let ((len 0) | |
565 | (index 0)) | |
566 | (while (string-match "\\[\\([^\]]\\)+\\]" regexp index) | |
567 | (setq len (+ len (- (match-beginning 0) index) 1) | |
568 | index (match-end 0))) | |
569 | len)) | |
570 | ||
571 | ;; Return t iff at least one member appears in both LIST1 and LIST2. | |
572 | (defun intersecting-p (list1 list2) | |
27463ede KH |
573 | (let ((found nil)) |
574 | (while (and list1 (not found)) | |
575 | (if (memq (car list1) list2) | |
576 | (setq found t) | |
577 | (setq list1 (cdr list1)))) | |
578 | found)) | |
579 | ||
31ce7719 KH |
580 | (defun string-conversion-by-rule (source symbol &rest specs) |
581 | "Convert string SOURCE by rules stored in SYMBOL property of each character. | |
582 | The remaining arguments forms a list SPECS that restricts applicable rules. | |
583 | ||
584 | The rules has the form ((REGEXP STR RULE-SPEC ...) ...). | |
585 | Each character sequence in STRING that matches REGEXP is | |
586 | replaced by STR. | |
587 | ||
588 | If SPECS is nil, only rules with no RULE-SPECs is applied. Otherwise | |
589 | rules with no RULE-SPECS and rules that have at least one member of | |
590 | SPECS in RULE-SPECs is applied. | |
591 | ||
592 | Rules are tested in the order of the list, thus more specific rules | |
593 | should be placed in front of less specific rules. | |
594 | ||
595 | If rule is given in the forms of regexp '...\\(...\\)...', a character | |
596 | sequence that matches the pattern inside of the parenthesis is the | |
597 | subject of the match. Otherwise, the entire expression is the subject | |
598 | of the match." | |
4ed46869 KH |
599 | (let ((pos 0) |
600 | (dst-str "")) | |
31ce7719 | 601 | (while (< pos (length source)) |
4ed46869 KH |
602 | (let ((found nil) |
603 | (rules (get-char-code-property | |
604 | (string-to-char | |
31ce7719 | 605 | (substring source pos)) symbol))) |
4ed46869 KH |
606 | (while rules |
607 | (let* ((rule (car rules)) | |
37cdc7ad KH |
608 | (regexp (car rule)) |
609 | (replace-str (car (cdr rule))) | |
610 | (rule-specs (cdr (cdr rule))) | |
611 | search-pos) | |
612 | (if (not (or (null rule-specs) | |
31ce7719 | 613 | (intersecting-p specs rule-specs))) |
37cdc7ad KH |
614 | (setq rules (cdr rules)) |
615 | (if (null (string-match "\\\\(.+\\\\)" regexp)) | |
616 | (progn | |
617 | (setq regexp (concat "\\(" regexp "\\)")) | |
618 | (setq search-pos pos)) | |
619 | (setq search-pos (- pos (max-match-len | |
620 | (substring regexp | |
621 | (string-match "^[^\\\\]*" regexp) | |
622 | (match-end 0)))))) | |
623 | (if (< search-pos 0) (setq search-pos 0)) | |
31ce7719 | 624 | (if (string-match regexp source search-pos) |
37cdc7ad KH |
625 | (if (= (match-beginning 1) pos) |
626 | (progn | |
627 | (setq dst-str (concat dst-str replace-str)) | |
628 | (setq rules nil) ; Get out of the loop. | |
629 | (setq found t) | |
630 | ;; proceed `pos' for replaced characters. | |
631 | (setq pos (match-end 1))) | |
632 | (setq rules (cdr rules))) | |
633 | (setq rules (cdr rules)))))) | |
4ed46869 KH |
634 | ;; proceed to next position |
635 | (if (not found) | |
31ce7719 | 636 | (setq dst-str (concat dst-str (substring source pos (1+ pos))) |
27463ede | 637 | pos (1+ pos))))) |
4ed46869 KH |
638 | dst-str)) |
639 | ||
37cdc7ad KH |
640 | |
641 | ;; | |
642 | ;; Convert Character Code to Glyph Code | |
643 | ;; | |
644 | ||
645 | ;;;###autoload | |
31ce7719 KH |
646 | (defun char-to-glyph-devanagari (string &rest langs) |
647 | "Convert Devanagari characters in STRING to Devanagari glyphs. | |
37cdc7ad KH |
648 | Ligatures and special rules are processed." |
649 | (apply | |
650 | 'string-conversion-by-rule | |
31ce7719 | 651 | (append (list string 'char-to-glyph) langs))) |
37cdc7ad | 652 | |
4ed46869 KH |
653 | ;; Example: |
654 | ;;(char-to-glyph-devanagari "\e$(5!X![!F!h!D!\\e(B") => "\e$(5!X!["F!D!\\e(B" | |
655 | ;;(char-to-glyph-devanagari "\e$(5!O!Z!V!h!=!h!O![!M\e(B") => ??? | |
656 | ||
657 | ;; | |
658 | ;; Phase 2: Compose Glyphs to form One Glyph. | |
659 | ;; | |
660 | ||
31ce7719 | 661 | ;; Each list consists of glyph, application-priority and application-direction. |
4ed46869 KH |
662 | ;; |
663 | ;; Glyphs will be ordered from low priority number to high priority number. | |
664 | ;; If application-priority is omitted, it is assumed to be 0. | |
665 | ;; If application-direction is omitted, it is asumbed to be '(mr . ml). | |
666 | ||
667 | (defconst devanagari-composition-rules | |
37cdc7ad KH |
668 | '((?\e$(5!!\e(B 0 (tr . br)) |
669 | (?\e$(5!"\e(B 0 (mr . mr)) | |
670 | (?\e$(5!#\e(B 0) | |
4ed46869 KH |
671 | (?\e$(5!$\e(B 0) |
672 | (?\e$(5!%\e(B 0) | |
673 | (?\e$(5!&\e(B 0) | |
674 | (?\e$(5!'\e(B 0) | |
675 | (?\e$(5!(\e(B 0) | |
676 | (?\e$(5!)\e(B 0) | |
677 | (?\e$(5!*\e(B 0) | |
678 | (?\e$(5!+\e(B 0) | |
679 | (?\e$(5!,\e(B 0) | |
680 | (?\e$(5!-\e(B 0) | |
681 | (?\e$(5!.\e(B 0) | |
682 | (?\e$(5!/\e(B 0) | |
683 | (?\e$(5!0\e(B 0) | |
684 | (?\e$(5!1\e(B 0) | |
685 | (?\e$(5!2\e(B 0) | |
686 | (?\e$(5!3\e(B 0) | |
687 | (?\e$(5!4\e(B 0) | |
688 | (?\e$(5!5\e(B 0) | |
689 | (?\e$(5!6\e(B 0) | |
690 | (?\e$(5!7\e(B 0) | |
691 | (?\e$(5!8\e(B 0) | |
692 | (?\e$(5!9\e(B 0) | |
693 | (?\e$(5!:\e(B 0) | |
694 | (?\e$(5!;\e(B 0) | |
695 | (?\e$(5!<\e(B 0) | |
696 | (?\e$(5!=\e(B 0) | |
697 | (?\e$(5!>\e(B 0) | |
698 | (?\e$(5!?\e(B 0) | |
699 | (?\e$(5!@\e(B 0) | |
700 | (?\e$(5!A\e(B 0) | |
701 | (?\e$(5!B\e(B 0) | |
702 | (?\e$(5!C\e(B 0) | |
703 | (?\e$(5!D\e(B 0) | |
704 | (?\e$(5!E\e(B 0) | |
705 | (?\e$(5!F\e(B 0) | |
706 | (?\e$(5!G\e(B 0) | |
707 | (?\e$(5!H\e(B 0) | |
708 | (?\e$(5!I\e(B 0) | |
709 | (?\e$(5!J\e(B 0) | |
710 | (?\e$(5!K\e(B 0) | |
711 | (?\e$(5!L\e(B 0) | |
712 | (?\e$(5!M\e(B 0) | |
713 | (?\e$(5!N\e(B 0) | |
714 | (?\e$(5!O\e(B 0) | |
715 | (?\e$(5!P\e(B 0) | |
716 | (?\e$(5!Q\e(B 0) | |
717 | (?\e$(5!R\e(B 0) | |
718 | (?\e$(5!S\e(B 0) | |
719 | (?\e$(5!T\e(B 0) | |
720 | (?\e$(5!U\e(B 0) | |
721 | (?\e$(5!V\e(B 0) | |
722 | (?\e$(5!W\e(B 0) | |
723 | (?\e$(5!X\e(B 0) | |
724 | (?\e$(5!Y\e(B 0) | |
37cdc7ad KH |
725 | (?\e$(5!Z\e(B 0) |
726 | (?\e$(5![\e(B 0 (ml . mr)) | |
727 | (?\e$(5!\\e(B 0) | |
728 | (?\e$(5!]\e(B 0 (br . tr)) | |
729 | (?\e$(5!^\e(B 0 (br . tr)) | |
730 | (?\e$(5!_\e(B 0 (br . tr)) | |
731 | (?\e$(5!`\e(B 0 (mr . mr)) ; (tc . bc) | |
732 | (?\e$(5!a\e(B 0 (mr . mr)) | |
733 | (?\e$(5!b\e(B 0 (mr . mr)) | |
734 | (?\e$(5!c\e(B 0 (mr . mr)) | |
735 | (?\e$(5!d\e(B 0) | |
736 | (?\e$(5!e\e(B 0) | |
737 | (?\e$(5!f\e(B 0) | |
738 | (?\e$(5!g\e(B 0) | |
bd09f27b KH |
739 | (?\e$(5!h\e(B 0 (br . tr)) |
740 | (?\e$(5!i\e(B 0 (br . tr)) | |
4ed46869 KH |
741 | (?\e$(5!j\e(B 0) |
742 | (nil 0) | |
743 | (nil 0) | |
744 | (nil 0) | |
745 | (nil 0) | |
746 | (nil 0) | |
747 | (nil 0) | |
748 | (?\e$(5!q\e(B 0) | |
749 | (?\e$(5!r\e(B 0) | |
750 | (?\e$(5!s\e(B 0) | |
751 | (?\e$(5!t\e(B 0) | |
752 | (?\e$(5!u\e(B 0) | |
753 | (?\e$(5!v\e(B 0) | |
754 | (?\e$(5!w\e(B 0) | |
755 | (?\e$(5!x\e(B 0) | |
756 | (?\e$(5!y\e(B 0) | |
757 | (?\e$(5!z\e(B 0) | |
758 | (nil 0) | |
759 | (nil 0) | |
760 | (nil 0) | |
761 | (nil 0) | |
762 | (?\e$(5"!\e(B 0) | |
763 | (?\e$(5""\e(B 0) | |
764 | (?\e$(5"#\e(B 0) | |
765 | (?\e$(5"$\e(B 0) | |
766 | (?\e$(5"%\e(B 0) | |
767 | (?\e$(5"&\e(B 0) | |
768 | (?\e$(5"'\e(B 0) | |
769 | (?\e$(5"(\e(B 0) | |
770 | (?\e$(5")\e(B 0) | |
771 | (?\e$(5"*\e(B 0) | |
772 | (?\e$(5"+\e(B 0) | |
773 | (?\e$(5",\e(B 0) | |
774 | (?\e$(5"-\e(B 0) | |
775 | (?\e$(5".\e(B 0) | |
776 | (?\e$(5"/\e(B 0) | |
777 | (?\e$(5"0\e(B 0) | |
778 | (?\e$(5"1\e(B 0) | |
779 | (?\e$(5"2\e(B 0) | |
780 | (?\e$(5"3\e(B 0) | |
781 | (?\e$(5"4\e(B 0) | |
782 | (?\e$(5"5\e(B 0) | |
783 | (?\e$(5"6\e(B 0) | |
784 | (?\e$(5"7\e(B 0) | |
785 | (?\e$(5"8\e(B 0) | |
786 | (?\e$(5"9\e(B 0) | |
787 | (?\e$(5":\e(B 0) | |
788 | (?\e$(5";\e(B 0) | |
789 | (?\e$(5"<\e(B 0) | |
790 | (?\e$(5"=\e(B 0) | |
791 | (?\e$(5">\e(B 0) | |
792 | (?\e$(5"?\e(B 0) | |
793 | (?\e$(5"@\e(B 0) | |
794 | (?\e$(5"A\e(B 0) | |
795 | (?\e$(5"B\e(B 0) | |
796 | (?\e$(5"C\e(B 0) | |
797 | (?\e$(5"D\e(B 0) | |
798 | (?\e$(5"E\e(B 0) | |
799 | (?\e$(5"F\e(B 0) | |
800 | (?\e$(5"G\e(B 0) | |
801 | (?\e$(5"H\e(B 0) | |
802 | (?\e$(5"I\e(B 0) | |
803 | (?\e$(5"J\e(B 0) | |
804 | (?\e$(5"K\e(B 0) | |
805 | (?\e$(5"L\e(B 0) | |
806 | (?\e$(5"M\e(B 0) | |
807 | (?\e$(5"N\e(B 0) | |
808 | (?\e$(5"O\e(B 0) | |
809 | (?\e$(5"P\e(B 0) | |
810 | (?\e$(5"Q\e(B 0) | |
811 | (?\e$(5"R\e(B 0) | |
812 | (?\e$(5"S\e(B 0) | |
813 | (?\e$(5"T\e(B 0) | |
814 | (?\e$(5"U\e(B 0) | |
815 | (?\e$(5"V\e(B 0) | |
816 | (?\e$(5"W\e(B 0) | |
817 | (?\e$(5"X\e(B 0) | |
818 | (?\e$(5"Y\e(B 0) | |
819 | (?\e$(5"Z\e(B 0) | |
820 | (?\e$(5"[\e(B 0) | |
821 | (?\e$(5"\\e(B 0) | |
822 | (?\e$(5"]\e(B 0) | |
823 | (?\e$(5"^\e(B 0) | |
824 | (?\e$(5"_\e(B 0) | |
825 | (?\e$(5"`\e(B 0) | |
826 | (?\e$(5"a\e(B 0) | |
827 | (?\e$(5"b\e(B 0) | |
828 | (?\e$(5"c\e(B 0) | |
829 | (?\e$(5"d\e(B 0) | |
830 | (?\e$(5"e\e(B 0) | |
831 | (?\e$(5"f\e(B 0) | |
832 | (?\e$(5"g\e(B 0) | |
833 | (?\e$(5"h\e(B 0) | |
834 | (?\e$(5"i\e(B 0) | |
835 | (?\e$(5"j\e(B 0) | |
836 | (?\e$(5"k\e(B 0) | |
837 | (?\e$(5"l\e(B 0) | |
838 | (?\e$(5"m\e(B 0) | |
839 | (?\e$(5"n\e(B 0) | |
840 | (?\e$(5"o\e(B 0) | |
37cdc7ad KH |
841 | (?\e$(5"p\e(B 10 (mr . mr)) |
842 | (?\e$(5"q\e(B 0 (br . br)) | |
843 | (?\e$(5"r\e(B 0 (br . tr)) | |
4ed46869 KH |
844 | (?\e$(5"s\e(B 0) |
845 | (?\e$(5"t\e(B 0) | |
846 | (?\e$(5"u\e(B 0) | |
847 | (?\e$(5"v\e(B 0) | |
848 | (?\e$(5"w\e(B 0) | |
849 | (?\e$(5"x\e(B 0) | |
850 | (?\e$(5"y\e(B 0) | |
851 | (?\e$(5"z\e(B 0) | |
852 | (?\e$(5"{\e(B 0) | |
853 | (?\e$(5"|\e(B 0) | |
854 | (?\e$(5"}\e(B 0) | |
855 | (?\e$(5"~\e(B 0) | |
856 | (?\e$(5#!\e(B 0) | |
857 | (?\e$(5#"\e(B 0) | |
858 | (?\e$(5##\e(B 0) | |
859 | (?\e$(5#$\e(B 0) | |
860 | (?\e$(5#%\e(B 0) | |
861 | (?\e$(5#&\e(B 0) | |
862 | (?\e$(5#'\e(B 0) | |
863 | (?\e$(5#(\e(B 0) | |
864 | (?\e$(5#)\e(B 0) | |
865 | (?\e$(5#*\e(B 0) | |
866 | (?\e$(5#+\e(B 0) | |
867 | (?\e$(5#,\e(B 0) | |
868 | (?\e$(5#-\e(B 0) | |
869 | (?\e$(5#.\e(B 0) | |
870 | (?\e$(5#/\e(B 0) | |
871 | (?\e$(5#0\e(B 0) | |
872 | (?\e$(5#1\e(B 0) | |
873 | (?\e$(5#2\e(B 0) | |
874 | (?\e$(5#3\e(B 0) | |
875 | (?\e$(5#4\e(B 0) | |
876 | (?\e$(5#5\e(B 0) | |
877 | (?\e$(5#6\e(B 0) | |
878 | (?\e$(5#7\e(B 0) | |
879 | (?\e$(5#8\e(B 0) | |
880 | (?\e$(5#9\e(B 0) | |
881 | (?\e$(5#:\e(B 0) | |
882 | (?\e$(5#;\e(B 0) | |
883 | (?\e$(5#<\e(B 0) | |
884 | (?\e$(5#=\e(B 0) | |
885 | (?\e$(5#>\e(B 0) | |
886 | (?\e$(5#?\e(B 0) | |
887 | (?\e$(5#@\e(B 0) | |
888 | (?\e$(5#A\e(B 0) | |
889 | (?\e$(5#B\e(B 0) | |
890 | (?\e$(5#C\e(B 0) | |
891 | (?\e$(5#D\e(B 0) | |
892 | (?\e$(5#E\e(B 0) | |
893 | (?\e$(5#F\e(B 0) | |
894 | (?\e$(5#G\e(B 0) | |
895 | (?\e$(5#H\e(B 0) | |
896 | (?\e$(5#I\e(B 0) | |
897 | (?\e$(5#J\e(B 0) | |
37cdc7ad KH |
898 | (?\e$(5#K\e(B 0 (br . tr)) |
899 | (?\e$(5#L\e(B 0 (br . tr)) | |
900 | (?\e$(5#M\e(B 0 (br . tr)) | |
4ed46869 KH |
901 | (?\e$(5#N\e(B 0) |
902 | (?\e$(5#O\e(B 0) | |
903 | (?\e$(5#P\e(B 0) | |
904 | (?\e$(5#Q\e(B 0) | |
905 | (?\e$(5#R\e(B 0) | |
906 | (?\e$(5#S\e(B 0) | |
907 | (?\e$(5#T\e(B 0) | |
908 | (?\e$(5#U\e(B 0) | |
909 | (?\e$(5#V\e(B 0) | |
910 | (?\e$(5#W\e(B 0) | |
911 | (?\e$(5#X\e(B 0) | |
912 | (?\e$(5#Y\e(B 0) | |
913 | (?\e$(5#Z\e(B 0) | |
914 | (?\e$(5#[\e(B 0) | |
915 | (?\e$(5#\\e(B 0) | |
916 | (?\e$(5#]\e(B 0) | |
917 | (?\e$(5#^\e(B 0) | |
918 | (?\e$(5#_\e(B 0) | |
919 | (?\e$(5#`\e(B 0) | |
920 | (?\e$(5#a\e(B 0) | |
921 | (?\e$(5#b\e(B 0) | |
922 | (?\e$(5#c\e(B 0) | |
923 | (?\e$(5#d\e(B 0) | |
924 | (?\e$(5#e\e(B 0) | |
925 | (?\e$(5#f\e(B 0) | |
926 | (?\e$(5#g\e(B 0) | |
927 | (?\e$(5#h\e(B 0) | |
928 | (?\e$(5#i\e(B 0) | |
929 | (?\e$(5#j\e(B 0) | |
930 | (?\e$(5#k\e(B 0) | |
931 | (?\e$(5#l\e(B 0) | |
932 | (?\e$(5#m\e(B 0) | |
933 | (?\e$(5#n\e(B 0) | |
934 | (?\e$(5#o\e(B 0) | |
935 | (?\e$(5#p\e(B 0) | |
936 | (?\e$(5#q\e(B 0) | |
937 | (?\e$(5#r\e(B 0) | |
938 | (?\e$(5#s\e(B 0) | |
939 | (?\e$(5#t\e(B 0) | |
940 | (?\e$(5#u\e(B 0) | |
941 | (?\e$(5#v\e(B 0) | |
942 | (?\e$(5#w\e(B 0) | |
943 | (?\e$(5#x\e(B 0) | |
944 | (?\e$(5#y\e(B 0) | |
945 | (?\e$(5#z\e(B 0) | |
946 | (?\e$(5#{\e(B 0) | |
947 | (?\e$(5#|\e(B 0) | |
948 | (?\e$(5#}\e(B 0) | |
949 | (?\e$(5#~\e(B 0) | |
950 | (?\e$(5$!\e(B 0) | |
951 | (?\e$(5$"\e(B 0) | |
952 | (?\e$(5$#\e(B 0) | |
953 | (?\e$(5$$\e(B 0) | |
954 | (?\e$(5$%\e(B 0) | |
955 | (?\e$(5$&\e(B 0) | |
956 | (?\e$(5$'\e(B 0) | |
957 | (?\e$(5$(\e(B 0) | |
958 | (?\e$(5$)\e(B 0) | |
959 | (?\e$(5$*\e(B 0) | |
960 | (?\e$(5$+\e(B 0) | |
961 | (?\e$(5$,\e(B 0) | |
962 | (?\e$(5$-\e(B 0) | |
963 | (?\e$(5$.\e(B 0) | |
964 | (?\e$(5$/\e(B 0) | |
965 | (?\e$(5$0\e(B 0) | |
966 | (?\e$(5$1\e(B 0) | |
967 | (?\e$(5$2\e(B 0) | |
968 | (?\e$(5$3\e(B 0) | |
969 | (?\e$(5$4\e(B 0) | |
970 | (?\e$(5$5\e(B 0) | |
971 | (?\e$(5$6\e(B 0) | |
972 | (?\e$(5$7\e(B 0) | |
973 | (?\e$(5$8\e(B 0) | |
974 | (?\e$(5$9\e(B 0) | |
975 | (?\e$(5$:\e(B 0) | |
976 | (?\e$(5$;\e(B 0) | |
977 | (?\e$(5$<\e(B 0) | |
978 | (?\e$(5$=\e(B 0) | |
979 | (?\e$(5$>\e(B 0) | |
980 | (?\e$(5$?\e(B 0) | |
981 | (?\e$(5$@\e(B 0) | |
982 | (?\e$(5$A\e(B 0) | |
983 | (?\e$(5$B\e(B 0) | |
984 | (?\e$(5$C\e(B 0) | |
985 | (?\e$(5$D\e(B 0) | |
986 | (?\e$(5$E\e(B 0) | |
987 | (?\e$(5$F\e(B 0) | |
988 | (?\e$(5$G\e(B 0) | |
989 | (?\e$(5$H\e(B 0) | |
990 | (?\e$(5$I\e(B 0) | |
991 | (?\e$(5$J\e(B 0) | |
992 | (?\e$(5$K\e(B 0) | |
993 | (?\e$(5$L\e(B 0) | |
994 | (?\e$(5$M\e(B 0) | |
995 | (?\e$(5$N\e(B 0) | |
996 | (?\e$(5$O\e(B 0) | |
997 | (?\e$(5$P\e(B 0) | |
998 | (?\e$(5$Q\e(B 0) | |
999 | (?\e$(5$R\e(B 0) | |
1000 | (?\e$(5$S\e(B 0) | |
1001 | (?\e$(5$T\e(B 0) | |
1002 | (?\e$(5$U\e(B 0) | |
1003 | (?\e$(5$V\e(B 0) | |
1004 | (?\e$(5$W\e(B 0) | |
1005 | (?\e$(5$X\e(B 0) | |
1006 | (?\e$(5$Y\e(B 0) | |
1007 | (?\e$(5$Z\e(B 0) | |
1008 | (?\e$(5$[\e(B 0) | |
1009 | (?\e$(5$\\e(B 0) | |
1010 | (?\e$(5$]\e(B 0) | |
1011 | (?\e$(5$^\e(B 0) | |
1012 | (?\e$(5$_\e(B 0) | |
1013 | (?\e$(5$`\e(B 0) | |
1014 | (?\e$(5$a\e(B 0) | |
1015 | (?\e$(5$b\e(B 0) | |
1016 | (?\e$(5$c\e(B 0) | |
1017 | (?\e$(5$d\e(B 0) | |
1018 | (?\e$(5$e\e(B 0) | |
1019 | (?\e$(5$f\e(B 0) | |
1020 | (?\e$(5$g\e(B 0) | |
1021 | (?\e$(5$h\e(B 0) | |
1022 | (?\e$(5$i\e(B 0) | |
1023 | (?\e$(5$j\e(B 0) | |
1024 | (?\e$(5$k\e(B 0) | |
1025 | (?\e$(5$l\e(B 0) | |
1026 | (?\e$(5$m\e(B 0) | |
1027 | (?\e$(5$n\e(B 0) | |
1028 | (?\e$(5$o\e(B 0) | |
1029 | (?\e$(5$p\e(B 0) | |
1030 | (?\e$(5$q\e(B 0) | |
1031 | (?\e$(5$r\e(B 0) | |
1032 | (?\e$(5$s\e(B 0) | |
1033 | (?\e$(5$t\e(B 0) | |
1034 | (?\e$(5$u\e(B 0) | |
1035 | (?\e$(5$v\e(B 0) | |
1036 | (?\e$(5$w\e(B 0) | |
1037 | (?\e$(5$x\e(B 0) | |
1038 | (?\e$(5$y\e(B 0) | |
1039 | (?\e$(5$z\e(B 0) | |
1040 | (?\e$(5${\e(B 0) | |
1041 | (?\e$(5$|\e(B 0) | |
1042 | (?\e$(5$}\e(B 0) | |
1043 | (?\e$(5$~\e(B 0) | |
1044 | )) | |
1045 | ||
1046 | ;; Determine composition priority and rule of the array of Glyphs. | |
1047 | ;; Sort the glyphs with their priority. | |
1048 | ||
31ce7719 KH |
1049 | (defun devanagari-reorder-glyphs-for-composition (string start end) |
1050 | (let ((pos start) | |
1051 | (ordered-glyphs nil)) | |
1052 | (while (< pos end) | |
1053 | (let ((glyph (aref string pos))) | |
4ed46869 KH |
1054 | (setq pos (1+ pos)) |
1055 | (setq ordered-glyphs | |
31ce7719 KH |
1056 | (append ordered-glyphs |
1057 | (list (assq glyph devanagari-composition-rules)))))) | |
4ed46869 KH |
1058 | (sort ordered-glyphs '(lambda (x y) (< (car (cdr x)) (car (cdr y))))))) |
1059 | ||
31ce7719 | 1060 | ! ;;(devanagari-compose-to-one-glyph "\e$(5"5!X![\e(B") => "\e4\e$(6!Xv#"5t%![\e0!X"5![\e1\e(B" |
4ed46869 KH |
1061 | |
1062 | (defun devanagari-compose-to-one-glyph (devanagari-string) | |
37cdc7ad | 1063 | (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition |
31ce7719 | 1064 | devanagari-string 0 (length devanagari-string))) |
4ed46869 KH |
1065 | ;; List of glyphs to be composed. |
1066 | (cmp-glyph-list (list (car (car o-glyph-list)))) | |
1067 | (o-glyph-list (cdr o-glyph-list))) | |
1068 | (while o-glyph-list | |
1069 | (let* ((o-glyph (car o-glyph-list)) | |
1070 | (glyph (if (< 2 (length o-glyph)) | |
1071 | ;; default composition | |
1072 | (list (car (cdr (cdr o-glyph))) (car o-glyph)) | |
1073 | ;; composition with a specified rule | |
1074 | (list '(mr . ml) (car o-glyph))))) | |
1075 | (setq o-glyph-list (cdr o-glyph-list)) | |
1076 | (setq cmp-glyph-list (append cmp-glyph-list glyph)))) | |
1077 | ;; Before applying compose-chars, convert glyphs to | |
1078 | ;; 1-column width if possible. | |
1079 | (setq cmp-glyph-list (devanagari-wide-to-narrow cmp-glyph-list)) | |
1080 | (if (= (length cmp-glyph-list) 1) (char-to-string (car cmp-glyph-list)) | |
1081 | (apply 'compose-chars cmp-glyph-list)))) | |
1082 | ||
31ce7719 KH |
1083 | (defun devanagari-composition-component (string &optional start end) |
1084 | (or start (setq start 0)) | |
1085 | (or end (setq end (length string))) | |
1086 | (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition | |
1087 | string start end)) | |
1088 | ;; List of glyphs to be composed. | |
1089 | (cmp-glyph-list (list (car (car o-glyph-list))))) | |
1090 | (setq o-glyph-list (cdr o-glyph-list)) | |
1091 | (while o-glyph-list | |
1092 | (let* ((o-glyph (car o-glyph-list)) | |
1093 | (glyph (if (< 2 (length o-glyph)) | |
1094 | ;; default composition | |
1095 | (list (car (cdr (cdr o-glyph))) (car o-glyph)) | |
1096 | ;; composition with a specified rule | |
1097 | (list '(mr . ml) (car o-glyph))))) | |
1098 | (setq o-glyph-list (cdr o-glyph-list)) | |
1099 | (setq cmp-glyph-list (append cmp-glyph-list glyph)))) | |
1100 | ;; Convert glyphs to 1-column width if possible. | |
1101 | (devanagari-wide-to-narrow cmp-glyph-list))) | |
1102 | ||
bd09f27b | 1103 | ;; Utility function for Phase 2.5 |
31ce7719 KH |
1104 | |
1105 | ;; Check whether GLYPH is a Devanagari vertical modifier or not. | |
bd09f27b KH |
1106 | ;; If it is a vertical modifier, whether it should be 1-column shape or not |
1107 | ;; depends on previous non-vertical modifier. | |
bd09f27b KH |
1108 | (defun devanagari-vertical-modifier-p (glyph) |
1109 | (string-match (char-to-string glyph) | |
907c83c8 | 1110 | "[\e$(5!"!]!^!_!`!a!b!c!h!i"p"q"r#K#L#M\e(B]")) |
bd09f27b KH |
1111 | |
1112 | (defun devanagari-non-vertical-modifier-p (glyph) | |
1113 | (string-match (char-to-string glyph) | |
907c83c8 KH |
1114 | ; "[\e$(5!Z![!\!d!e!f!g\e(B]")) |
1115 | "[\e$(5![\e(B]")) | |
bd09f27b | 1116 | |
907c83c8 | 1117 | (defun devanagari-wide-to-narrow-char (char) |
31ce7719 KH |
1118 | "Convert Devanagari character CHAR to the corresponding narrow character. |
1119 | If there's no corresponding narrow character, return CHAR as is." | |
907c83c8 | 1120 | (let ((narrow (cdr (assq char devanagari-1-column-char)))) |
31ce7719 | 1121 | (or narrow char))) |
4ed46869 KH |
1122 | |
1123 | ;; | |
31ce7719 | 1124 | ;; Phase 2.5 Convert appropriate character to 1-column shape. |
4ed46869 KH |
1125 | ;; |
1126 | ;; This is temporary and should be removed out when Emacs supports | |
1127 | ;; variable width characters. | |
1128 | ;; | |
1129 | ;; This will convert the composing glyphs (2 column glyphs) | |
1130 | ;; to narrow (1 column) glyphs if they exist. | |
1131 | ;; | |
1132 | ;; devanagari-wide-to-narrow-old converts glyphs simply. | |
1133 | ;; devanagari-wide-to-narrow takes care of upper/lower apply-glyphs | |
1134 | ;; with 2 column base-glyph. | |
1135 | ;; | |
1136 | ;; Execution Examples | |
bd09f27b KH |
1137 | ;;(devanagari-wide-to-narrow '(?\e$(5!3\e(B (ml . ml) ?\e$(5!a\e(B)) |
1138 | ;;(devanagari-wide-to-narrow '(?\e$(5!F\e(B (ml . ml) ?\e$(5!a\e(B)) | |
1139 | ||
4ed46869 | 1140 | (defun devanagari-wide-to-narrow (src-list) |
bd09f27b KH |
1141 | (devanagari-wide-to-narrow-iter src-list t)) |
1142 | ||
907c83c8 | 1143 | (defun devanagari-wide-to-narrow-iter (src-list 2-col-glyph) |
bd09f27b KH |
1144 | (let ((glyph (car src-list))) |
1145 | (cond ((null src-list) '()) | |
1146 | ; not glyph code | |
1147 | ((not (numberp glyph)) | |
31ce7719 KH |
1148 | (cons glyph |
1149 | (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph))) | |
907c83c8 | 1150 | ; glyphs to be processed regardless of the value of "2-col-glyph" |
bd09f27b | 1151 | ((devanagari-non-vertical-modifier-p glyph) |
907c83c8 KH |
1152 | (cons (devanagari-wide-to-narrow-char glyph) |
1153 | (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph))) | |
1154 | ; glyphs which are depends on the value of "2-col-glyph" | |
1155 | ((devanagari-vertical-modifier-p glyph) | |
1156 | (if 2-col-glyph | |
bd09f27b | 1157 | (cons glyph |
907c83c8 KH |
1158 | (devanagari-wide-to-narrow-iter (cdr src-list) t)) |
1159 | (cons (devanagari-wide-to-narrow-char glyph) | |
31ce7719 KH |
1160 | (devanagari-wide-to-narrow-iter (cdr src-list) |
1161 | 2-col-glyph)))) | |
bd09f27b KH |
1162 | ; normal glyph |
1163 | (t | |
1164 | (if (cdr (assq glyph devanagari-1-column-char)) | |
907c83c8 | 1165 | (cons (devanagari-wide-to-narrow-char glyph) |
bd09f27b KH |
1166 | (devanagari-wide-to-narrow-iter (cdr src-list) nil)) |
1167 | (cons glyph | |
1168 | (devanagari-wide-to-narrow-iter (cdr src-list) t))))))) | |
1169 | ||
4ed46869 KH |
1170 | |
1171 | ;; | |
1172 | ;; Summary | |
1173 | ;; | |
1174 | ||
4ed46869 | 1175 | ;; |
31ce7719 | 1176 | ;; Decomposition of composite sequence. |
4ed46869 KH |
1177 | ;; |
1178 | ||
4ed46869 KH |
1179 | ;;;###autoload |
1180 | (defun devanagari-decompose-string (str) | |
31ce7719 KH |
1181 | "Decompose Devanagari string STR" |
1182 | (decompose-string (copy-sequence str))) | |
4ed46869 KH |
1183 | |
1184 | ;;;###autoload | |
1185 | (defun devanagari-decompose-region (from to) | |
1186 | (interactive "r") | |
31ce7719 | 1187 | (decompose-region from to)) |
4ed46869 | 1188 | |
37cdc7ad KH |
1189 | ;;; |
1190 | ;;; Composition | |
1191 | ;;; | |
1192 | ||
1193 | ;;;###autoload | |
1194 | (defun devanagari-compose-string (str &rest langs) | |
31ce7719 KH |
1195 | (setq str (copy-sequence str)) |
1196 | (let ((idx 0) | |
1197 | rest match-b match-e) | |
1198 | (while (string-match devanagari-composite-glyph-unit str idx) | |
1199 | (let* ((match-b (match-beginning 0)) | |
1200 | (match-e (match-end 0)) | |
1201 | (cmps (devanagari-composition-component | |
1202 | (apply | |
1203 | 'char-to-glyph-devanagari | |
1204 | (cons (substring str match-b match-e) langs))))) | |
1205 | (compose-string str match-b match-e cmps) | |
1206 | (setq idx match-e)))) | |
1207 | str) | |
4ed46869 | 1208 | |
37cdc7ad KH |
1209 | ;;;###autoload |
1210 | (defun devanagari-compose-region (from to &rest langs) | |
1211 | (interactive "r") | |
31ce7719 KH |
1212 | (save-excursion |
1213 | (save-restriction | |
1214 | (narrow-to-region from to) | |
1215 | (goto-char (point-min)) | |
1216 | (while (re-search-forward devanagari-composite-glyph-unit nil t) | |
1217 | (let* ((match-b (match-beginning 0)) (match-e (match-end 0)) | |
1218 | (cmps (devanagari-composition-component | |
1219 | (apply | |
1220 | 'char-to-glyph-devanagari | |
1221 | (cons (buffer-substring match-b match-e) langs))))) | |
1222 | (compose-region match-b match-e cmps)))))) | |
4ed46869 KH |
1223 | |
1224 | ;; For pre-write and post-read conversion | |
1225 | ||
1226 | ;;;###autoload | |
1227 | (defun devanagari-compose-from-is13194-region (from to) | |
1228 | "Compose IS 13194 characters in the region to Devanagari characters." | |
1229 | (interactive "r") | |
b9c4dcd8 KH |
1230 | (save-excursion |
1231 | (save-restriction | |
1232 | (narrow-to-region from to) | |
1233 | (indian-to-devanagari-region (point-min) (point-max)) | |
1234 | (devanagari-compose-region (point-min) (point-max)) | |
1235 | (- (point-max) (point-min))))) | |
4ed46869 | 1236 | |
cefa701a KH |
1237 | ;;;###autoload |
1238 | (defun in-is13194-devanagari-post-read-conversion (len) | |
31ce7719 KH |
1239 | (let ((pos (point))) |
1240 | (devanagari-compose-from-is13194-region pos (+ pos len)))) | |
cefa701a | 1241 | |
4ed46869 KH |
1242 | ;;;###autoload |
1243 | (defun devanagari-decompose-to-is13194-region (from to) | |
1244 | "Decompose Devanagari characters in the region to IS 13194 characters." | |
1245 | (interactive "r") | |
31ce7719 KH |
1246 | (save-excursion |
1247 | (save-restriction | |
1248 | (narrow-to-region from to) | |
1249 | (devanagari-decompose-region (point-min) (point-max)) | |
1250 | (devanagari-to-indian-region (point-min) (point-max))))) | |
4ed46869 | 1251 | |
cefa701a KH |
1252 | ;;;###autoload |
1253 | (defun in-is13194-devanagari-pre-write-conversion (from to) | |
cb877882 | 1254 | (let ((old-buf (current-buffer))) |
4cec2d0a KH |
1255 | (set-buffer (generate-new-buffer " *temp*")) |
1256 | (if (stringp from) | |
1257 | (insert from) | |
1258 | (insert-buffer-substring old-buf from to)) | |
1259 | (devanagari-decompose-to-is13194-region (point-min) (point-max)) | |
cefa701a KH |
1260 | ;; Should return nil as annotations. |
1261 | nil)) | |
37cdc7ad KH |
1262 | |
1263 | ;; For input/output of ITRANS | |
1264 | ||
1265 | ;;;###autoload | |
1266 | (defun devanagari-encode-itrans-region (from to) | |
1267 | (interactive "r") | |
1268 | (save-restriction | |
1269 | (narrow-to-region from to) | |
1270 | (devanagari-decompose-to-is13194-region (point-min) (point-max)) | |
1271 | (indian-encode-itrans-region (point-min) (point-max)))) | |
1272 | ||
1273 | ;;;###autoload | |
1274 | (defun devanagari-decode-itrans-region (from to) | |
1275 | (interactive "r") | |
1276 | (save-restriction | |
1277 | (narrow-to-region from to) | |
1278 | (indian-decode-itrans-region (point-min) (point-max)) | |
1279 | (devanagari-compose-from-is13194-region (point-min) (point-max)))) | |
1280 | ||
4ed46869 | 1281 | ;; |
650e8505 | 1282 | (provide 'devan-util) |
4ed46869 | 1283 | |
a604c5af KH |
1284 | ;; Local Variables: |
1285 | ;; coding: iso-2022-7bit | |
1286 | ;; End: | |
e8af40ee PJ |
1287 | |
1288 | ;;; devan-util.el ends here |