Commit | Line | Data |
---|---|---|
4ed46869 KH |
1 | ;;; devan-util.el --- Support for Devanagari Script Composition |
2 | ||
3 | ;; Copyright (C) 1996 Free Software Foundation, Inc. | |
4 | ||
5 | ;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp> | |
6 | ||
7 | ;; Keywords: multilingual, Indian, Devanagari | |
8 | ||
9 | ;; This file is part of GNU Emacs. | |
10 | ||
11 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
12 | ;; it under the terms of the GNU General Public License as published by | |
13 | ;; the Free Software Foundation; either version 2, or (at your option) | |
14 | ;; any later version. | |
15 | ||
16 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
17 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | ;; GNU General Public License for more details. | |
20 | ||
21 | ;; You should have received a copy of the GNU General Public License | |
e803d6bd KH |
22 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
23 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
24 | ;; Boston, MA 02111-1307, USA. | |
4ed46869 KH |
25 | |
26 | ;;; Commentary: | |
27 | ||
28 | ;; History: | |
29 | ;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp> | |
37cdc7ad KH |
30 | ;; 1997.3.24 fixed some bugs. |
31 | ||
32 | ;; Future work :: | |
33 | ;; Decompose the input characters and process them on the character basis. | |
4ed46869 KH |
34 | |
35 | ;; Devanagari script composition rules and related programs. | |
36 | ||
37 | ;;; Code: | |
38 | ||
39 | ;;; | |
40 | ;;; Steps toward composition of Devanagari Characters. | |
41 | ;;; | |
42 | ||
335a7ad7 KH |
43 | ;;;###autoload |
44 | (defun setup-devanagari-environment () | |
45 | "Setup multilingual environment (MULE) for languages using Devanagari." | |
46 | (interactive) | |
45717142 KH |
47 | (set-language-environment "Devanagari")) |
48 | ||
4ed46869 KH |
49 | ;;; Basic functions. |
50 | ||
51 | ;;;###autoload | |
52 | (defun indian-to-devanagari (ch) | |
53 | "Convert IS 13194 characters to Devanagari basic characters." | |
54 | (let ((charcodes (split-char ch))) | |
55 | (if (eq (car charcodes) 'indian-is13194) | |
56 | (make-char 'indian-2-column ?\x21 (nth 1 charcodes)) | |
57 | ch))) | |
58 | ||
59 | ;;;###autoload | |
60 | (defun devanagari-to-indian (ch) | |
61 | "Convert Devanagari basic characters to IS 13194 characters." | |
62 | (let* ((charcodes (split-char ch)) | |
63 | (charset (car charcodes)) | |
64 | (code-h (car (cdr charcodes)))) | |
65 | (if (and (eq (car charcodes) 'indian-2-column) | |
66 | (= (nth 1 charcodes) ?\x21)) | |
67 | (make-char 'indian-is13194 (nth 2 charcodes)) | |
68 | ch))) | |
69 | ||
70 | ;;;###autoload | |
71 | (defun indian-to-devanagari-region (from to) | |
72 | "Convert IS 13194 characters in region to Devanagari basic characters." | |
73 | (interactive "r") | |
74 | (save-restriction | |
75 | (narrow-to-region from to) | |
76 | (goto-char (point-min)) | |
37cdc7ad KH |
77 | ; (while (re-search-forward "\\cd" nil t) |
78 | (while (re-search-forward "." nil t) | |
4ed46869 KH |
79 | (let* ((devanagari-char (indian-to-devanagari (preceding-char)))) |
80 | (delete-char -1) | |
81 | (insert devanagari-char))))) | |
82 | ||
83 | ;;;###autoload | |
84 | (defun devanagari-to-indian-region (from to) | |
85 | "Convert Devanagari basic characters in region to Indian characters." | |
86 | (interactive "r") | |
87 | (save-restriction | |
88 | (narrow-to-region from to) | |
89 | (goto-char (point-min)) | |
37cdc7ad KH |
90 | ; (while (re-search-forward "\\cD" nil t) ; Devanagari Character Code. |
91 | (while (re-search-forward "." nil t) | |
4ed46869 KH |
92 | (let* ((indian-char (devanagari-to-indian (preceding-char)))) |
93 | (delete-char -1) | |
94 | (insert indian-char))))) | |
95 | ||
96 | ;;;###autoload | |
97 | (defun indian-to-devanagari-string (str) | |
98 | "Convert Indian String to Devanagari Basic Character String." | |
27463ede KH |
99 | (let* ((len (length str)) |
100 | (i 0) | |
101 | (vec (make-vector len 0))) | |
102 | (while (< i len) | |
103 | (aset vec i (indian-to-devanagari (aref str i))) | |
104 | (setq i (1+ i))) | |
105 | (concat vec))) | |
4ed46869 KH |
106 | |
107 | ;; Phase 0 - Determine whether the characters can be composed. | |
108 | ;; | |
109 | ;;; | |
110 | ;;; Regular expressions to split characters for composition. | |
111 | ;;; | |
112 | ;; | |
113 | ;; Indian script word contains one or more syllables. | |
114 | ;; In BNF, it can be expressed as follows: | |
115 | ;; | |
116 | ;; Word ::= {Syllable} [Cons-Syllable] | |
117 | ;; Syllable ::= Cons-Vowel-Syllable | Vowel-Syllable | |
118 | ;; Vowel-Syllable ::= V[D] | |
119 | ;; Cons-Vowel-Syllable ::= [Cons-Syllable] Full-Cons [M] [D] | |
bd09f27b | 120 | ;; Cons-Syllable ::= [Pure-Cons] [Pure-Cons] [Pure-Cons] Pure-Cons |
4ed46869 KH |
121 | ;; Pure-Cons ::= Full-Cons H |
122 | ;; Full-Cons ::= C [N] | |
123 | ;; | |
124 | ;; {} repeat, [] optional | |
125 | ;; | |
126 | ;; C - Consonant (\e$(5!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A!B!C!D!E\e(B | |
127 | ;; \e$(5!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X\e(B) | |
128 | ;; N - Nukta (\e$(5!i\e(B) | |
bd09f27b KH |
129 | ;; H - Halant(\e$(5!h\e(B) or Virama |
130 | ;; V - Vowel (\e$(5!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2#&#'#*\e(B) | |
131 | ;; ("\e$(5#&#'#*\e(B" can be obtained by IS13194 vowels with nukta.) | |
37cdc7ad KH |
132 | ;; D - Vowel Modifiers, i.e. Anuswar, Chandrabindu (\e$(5!!!"\e(B) |
133 | ;; (Visaraga (\e$(5!#\e(B) is excluded.) | |
bd09f27b KH |
134 | ;; M - Matra (\e$(5!Z![!\!]!^!_!`!a!b!c!d!e!f!g#K#L#M\e(B) |
135 | ;; ("\e$(5#K#L#M\e(B" can be obtained by IS13194 matras with nukta.) | |
4ed46869 | 136 | ;; |
37cdc7ad KH |
137 | ;; In Emacs, one syllable of Indian language is considered to be one |
138 | ;; composite glyph. If we expand the above expression for | |
139 | ;; cons-vowel-syllable, it would be: | |
4ed46869 | 140 | ;; |
37cdc7ad | 141 | ;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] [D] |
4ed46869 | 142 | ;; |
37cdc7ad | 143 | ;; Therefore, in worst case, the one syllable may contain |
4ed46869 KH |
144 | ;; following characters. |
145 | ;; | |
bd09f27b KH |
146 | ;; C N H C N H C N H C N H C N M D |
147 | ;; | |
37cdc7ad KH |
148 | ;; The example is a sanskrit word "kArtsnya", where five consecutive |
149 | ;; consonants appear. | |
4ed46869 | 150 | ;; |
37cdc7ad KH |
151 | ;; On the other hand, consonant-syllable, which appears at the end of |
152 | ;; the word, would have the following expression: | |
4ed46869 | 153 | ;; |
bd09f27b | 154 | ;; [C [N] H] [C [N] H] [C [N] H] C [N] H |
4ed46869 KH |
155 | ;; |
156 | ;; This is acceptable BEFORE proper consonant-syllable is input. The | |
157 | ;; string which doesn't match with the above expression is invalid and | |
158 | ;; thus must be fixed. | |
159 | ;; | |
160 | ;; Note: | |
907c83c8 | 161 | ;; Third case can be considered, which is an acceptable syllable and can |
4ed46869 KH |
162 | ;; not add any code more. |
163 | ;; | |
bd09f27b | 164 | ;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] D |
4ed46869 KH |
165 | ;; |
166 | ;; However, to make editing possible even in this condition, we will | |
167 | ;; not consider about this case. | |
37cdc7ad KH |
168 | ;; |
169 | ;; Note: | |
170 | ;; Currently, it seems that the only following consonants would have | |
171 | ;; Nukta sign attatched. | |
172 | ;; (\e$(5!3!4!5!:!?!@!I\e(B) | |
173 | ;; Therefore, [\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B? can be re-written as | |
174 | ;; \\([\e$(5!3!4!5!:!?!@!I\e(B]\e$(5!i\e(B\\)\\|[\e$(5!3\e(B-\e$(5!X\e(B] | |
175 | ||
176 | (defconst devanagari-full-cons | |
177 | "\\(\\([\e$(5!3!4!5!:!?!@!I\e(B]\e$(5!i\e(B\\)\\|[\e$(5!3\e(B-\e$(5!X$.$E"%\e(B]\\)" | |
178 | "Devanagari full consonant") | |
179 | ||
180 | (defconst devanagari-pure-cons | |
181 | (concat "\\(" devanagari-full-cons "\e$(5!h\e(B\\)") | |
182 | "Devanagari pure consonant") | |
183 | ||
184 | (defconst devanagari-matra | |
185 | "\\(\\([\e$(5!_![!\\e(B]\e$(5!i\e(B\\)\\|[\e$(5!Z\e(B-\e$(5!g#K#L#M\e(B]\\)" | |
186 | "Devanagari Matra Signs. '\e$(5#K#L#M\e(B' can also be created from the combination | |
187 | of '\e$(5!_![!\\e(B' and nukta sign.") | |
188 | ||
189 | (defconst devanagari-vowel | |
190 | "\\(\\([\e$(5!*!&!'\e(B]\e$(5!i\e(B\\)\\|[\e$(5!$\e(B-\e$(5!2#&#'#*\e(B]\\)" | |
191 | "Devanagari Vowels. '\e$(5#&#'#*\e(B' can also be created from the combination | |
192 | of '\e$(5!*!&!'\e(B' and nukta sign.") | |
193 | ||
194 | (defconst devanagari-vowel-syllable | |
195 | (concat devanagari-vowel "[\e$(5!!!"\e(B]?") | |
196 | "Devanagari vowel syllable.") | |
197 | ||
198 | (defconst devanagari-cons-syllable | |
199 | (concat devanagari-pure-cons "?" devanagari-pure-cons "?" | |
200 | devanagari-pure-cons "?" devanagari-pure-cons "$") | |
201 | "Devanagari consonant syllable") | |
202 | ||
203 | (defconst devanagari-cons-vowel-syllable | |
204 | (concat "\\(" | |
205 | devanagari-pure-cons "?" devanagari-pure-cons "?" | |
206 | devanagari-pure-cons "?" devanagari-pure-cons "\\)?" | |
207 | devanagari-full-cons devanagari-matra "?[\e$(5!!!"\e(B]?") | |
208 | "Devanagari consonant vowel syllable.") | |
4ed46869 KH |
209 | |
210 | ;; | |
211 | ;; Also, digits and virams should be processed other than syllables. | |
212 | ;; | |
213 | ;; In IS 13194, Avagrah is obtained by Nukta after Viram, and | |
214 | ;; OM is obtained by Nukta after Chandrabindu | |
215 | ;; | |
37cdc7ad KH |
216 | |
217 | (defconst devanagari-digit-viram-visarga | |
bb6c9254 KH |
218 | "[\e$(5!q\e(B-\e$(5!z!j!#\e(B]") |
219 | ||
37cdc7ad | 220 | (defconst devanagari-other-sign |
bd09f27b | 221 | "\\([\e$(5!!!j\e(B]\e$(5!i\e(B\\)\\|\\([\e$(5#!#J\e(B]\\)") |
4ed46869 | 222 | |
37cdc7ad KH |
223 | (defconst devanagari-composite-glyph-unit |
224 | (concat "\\(" devanagari-cons-syllable | |
225 | "\\)\\|\\(" devanagari-vowel-syllable | |
37cdc7ad | 226 | "\\)\\|\\(" devanagari-cons-vowel-syllable |
bb6c9254 KH |
227 | "\\)\\|\\(" devanagari-other-sign |
228 | "\\)\\|\\(" devanagari-digit-viram-visarga "\\)") | |
4ed46869 KH |
229 | "Regexp matching to Devanagari string to be composed form one glyph.") |
230 | ||
231 | ;;(put-charset-property charset-devanagari-1-column | |
232 | ;; 'char-to-glyph 'devanagari-compose-string) | |
233 | ;;(put-charset-property charset-devanagari-2-column | |
234 | ;; 'char-to-glyph 'devanagari-compose-string) | |
235 | ||
236 | ;; Sample | |
237 | ;; | |
37cdc7ad KH |
238 | ;;(string-match devanagari-cons-vowel-syllable-examine "\e$(5!X![\e(B") => 0 |
239 | ;;(string-match devanagari-cons-vowel-syllable-examine "\e$(5!F!h!D!\\e(B") => 0 | |
240 | ;;(string-match devanagari-cons-vowel-syllable-examine "\e$(5!X![!F!h!D!\\e(B") => 0 | |
4ed46869 KH |
241 | |
242 | ;; | |
243 | ;; Steps toward the composition | |
37cdc7ad | 244 | ;; Converting Character Codes to Composite Glyph. |
4ed46869 KH |
245 | ;; |
246 | ;; Example : \e$(5!X![\e(B/\e$(5!F!h!D!\\e(B | |
247 | ;; | |
248 | ;; First, convert Characters to appropriate glyphs. | |
249 | ;; | |
250 | ;; => \e$(5!X![\e(B/\e$(5"F!D!\\e(B | |
251 | ;; | |
252 | ;; Then, determine the base glyph, apply-orders and apply-rules. | |
253 | ;; | |
254 | ;; => \e$(5!X\e(B (ml.mr) \e$(5![\e(B / \e$(5!D\e(B (ml.mr) \e$(5"F\e(B (mr ml) \e$(5!\\e(B | |
255 | ;; | |
256 | ;; Finally, convert 2-column glyphs to 1-column glyph | |
257 | ;; if such a glyph exist. | |
258 | ;; | |
259 | ;; => \e$(6![\e(B (ml.mr) \e$(6!X\e(B / \e$(6!D\e(B (ml.mr) \e$(6"F\e(B (mr ml) \e$(6!\\e(B | |
260 | ;; | |
261 | ;; Compose the glyph. | |
262 | ;; | |
27463ede KH |
263 | ;; => \e2\e$(6!X@![\e(B\e1/\e2\e$(6!D@"FP!\\e(B\e1 |
264 | ;; => \e2\e$(6!X@![\e(B\e1\e2\e$(6!D@"FP!\\e(B\e1 | |
4ed46869 KH |
265 | ;; |
266 | ||
267 | ;; | |
268 | ;; Phase 1: Converting Character Code to Glyph Code. | |
269 | ;; | |
270 | ;; | |
271 | ;; IMPORTANT: | |
272 | ;; There may be many rules which you many want to be suppressed. | |
273 | ;; In that case, please comment out that rule. | |
274 | ;; | |
275 | ;; RULES WILL BE EVALUATED FROM FIRST TO LAST. | |
276 | ;; PUT MORE SPECIFIC RULES FIRST. | |
277 | ;; | |
278 | ;; TO DO: | |
279 | ;; Prepare multiple specific list of rules for each languages | |
280 | ;; which adopts Devanagari script. | |
281 | ;; | |
282 | ||
4ed46869 KH |
283 | (defconst devanagari-char-to-glyph-rules |
284 | '( | |
4ed46869 KH |
285 | |
286 | ;; `r' at the top of syllable and followed by other consonants. | |
37cdc7ad KH |
287 | ;; ("[^\e$(5!h\e(B]\\(\e$(5!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" "\e$(5"p\e(B") |
288 | ("^\\(\e$(5!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" "\e$(5"p\e(B") | |
4ed46869 KH |
289 | |
290 | ;; Ligature Rules | |
37cdc7ad KH |
291 | ("\\(\e$(5!3!h!B!h!O!h!M\e(B\\)" "\e$(5$!\e(B" sanskrit) |
292 | ("\\(\e$(5!3!h!B!h!T\e(B\\)" "\e$(5$"\e(B" sanskrit) | |
293 | ("\\(\e$(5!3!h!B!h!M\e(B\\)" "\e$(5$#\e(B" sanskrit) | |
294 | ("\\(\e$(5!3!h!F!h!M\e(B\\)" "\e$(5$$\e(B") | |
295 | ("\\(\e$(5!3!h!O!h!M\e(B\\)" "\e$(5$%\e(B") | |
296 | ("\\(\e$(5!3!h!O\e(B\\)" "\e$(5"#\e(B") ; Post "r" | |
297 | ("\\(\e$(5!3!h!T!h!M\e(B\\)" "\e$(5$&\e(B" sanskrit) | |
298 | ("\\(\e$(5!3!h\e(B\\)\e$(5!3!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"3\e(B") ; Special Half Form | |
299 | ("\\(\e$(5!3!h!3\e(B\\)" "\e$(5$'\e(B") | |
300 | ("\\(\e$(5!3!h\e(B\\)\e$(5!B!h!O\e(B" "\e$(5"3\e(B") ; Special Rules for "k-tr" | |
301 | ("\\(\e$(5!3!h!B\e(B\\)" "\e$(5$(\e(B") | |
302 | ("\\(\e$(5!3!h!F\e(B\\)" "\e$(5$)\e(B") | |
303 | ("\\(\e$(5!3!h!L\e(B\\)" "\e$(5$*\e(B") | |
304 | ("\\(\e$(5!3!h!M\e(B\\)" "\e$(5$+\e(B") | |
305 | ("\\(\e$(5!3!h!Q\e(B\\)" "\e$(5$,\e(B") | |
306 | ("\\(\e$(5!3!h!T\e(B\\)" "\e$(5$-\e(B") | |
307 | ("\\(\e$(5!3!h!V!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"l\e(B") ; Half Form | |
308 | ("\\(\e$(5$.!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"l\e(B") ; Half Form | |
309 | ("\\(\e$(5!3!h!V\e(B\\)" "\e$(5$.\e(B") | |
310 | ("\\(\e$(5!3!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"3\e(B") ; Half Form | |
311 | ("\\(\e$(5!3!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"s\e(B") ; Nukta Half Form | |
312 | ("\\(\e$(5!3!i\e(B\\)" "\e$(5#3\e(B") ; Nukta | |
313 | ("\\(\e$(5!4!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"4\e(B") ; Half Form | |
314 | ("\\(\e$(5!4!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"t\e(B") ; Nukta Half Form | |
315 | ("\\(\e$(5!4!i\e(B\\)" "\e$(5#4\e(B") ; Nukta | |
316 | ("\\(\e$(5!5!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"`\e(B") ; Half Form | |
317 | ("\\(\e$(5!5!h!O\e(B\\)" "\e$(5"$\e(B") ; Post "r" | |
318 | ("\\(\e$(5!5!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"5\e(B") ; Half Form | |
319 | ("\\(\e$(5!5!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"u\e(B") ; Nukta Half Form | |
320 | ("\\(\e$(5!5!i\e(B\\)" "\e$(5#5\e(B") ; Nukta | |
321 | ("\\(\e$(5!6!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"a\e(B") ; Half Form | |
322 | ("\\(\e$(5!6!h!F\e(B\\)" "\e$(5$/\e(B") | |
323 | ; Slot | |
324 | ("\\(\e$(5!6!h!O\e(B\\)" "\e$(5!6"q\e(B") ; Post "r" | |
325 | ("\\(\e$(5!6!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"6\e(B") ; Half Form | |
326 | ("\\(\e$(5!7!h!3!h!B!h!M\e(B\\)" "\e$(5$0\e(B" sanskrit) | |
327 | ("\\(\e$(5!7!h!3!h!V!h!T\e(B\\)" "\e$(5$1\e(B" sanskrit) | |
328 | ("\\(\e$(5!7!h!3!h!B\e(B\\)" "\e$(5$2\e(B" sanskrit) | |
329 | ("\\(\e$(5!7!h!3!h!V\e(B\\)" "\e$(5$3\e(B" sanskrit) | |
330 | ("\\(\e$(5!7!h!3!h!O\e(B\\)" "\e$(5$9"q\e(B") ; Special Rule. May be precomposed font needed. | |
331 | ("\\(\e$(5!7!h!6!h!O\e(B\\)" "\e$(5$4\e(B" sanskrit) | |
332 | ("\\(\e$(5!7!h!3!h!M\e(B\\)" "\e$(5$5\e(B" sanskrit) | |
333 | ("\\(\e$(5!7!h!4!h!M\e(B\\)" "\e$(5$6\e(B" sanskrit) | |
334 | ("\\(\e$(5!7!h!5!h!M\e(B\\)" "\e$(5$7\e(B" sanskrit) | |
335 | ("\\(\e$(5!7!h!6!h!M\e(B\\)" "\e$(5$8\e(B" sanskrit) | |
336 | ("\\(\e$(5!7!h!3\e(B\\)" "\e$(5$9\e(B") | |
337 | ("\\(\e$(5!7!h!4\e(B\\)" "\e$(5$:\e(B") | |
338 | ("\\(\e$(5!7!h!5!h!O\e(B\\)" "\e$(5$;"q\e(B") ; Special Rule. May be precomposed font needed. | |
339 | ("\\(\e$(5!7!h!5\e(B\\)" "\e$(5$;\e(B") | |
340 | ("\\(\e$(5!7!h!6\e(B\\)" "\e$(5$<\e(B") | |
341 | ("\\(\e$(5!7!h!7\e(B\\)" "\e$(5$=\e(B") | |
342 | ("\\(\e$(5!7!h!F\e(B\\)" "\e$(5$>\e(B") | |
343 | ("\\(\e$(5!7!h!L\e(B\\)" "\e$(5$?\e(B") | |
344 | ("\\(\e$(5!7!h!M\e(B\\)" "\e$(5$@\e(B") | |
345 | ("\\(\e$(5!8!h\e(B\\)[\e$(5!8!<\e(B]\e$(5!h\e(B" "\e$(5"8\e(B") ; Half Form | |
346 | ("\\(\e$(5!8!h!8\e(B\\)" "\e$(5$A\e(B") | |
347 | ("\\(\e$(5!8!h!<\e(B\\)" "\e$(5$B\e(B") | |
348 | ("\\(\e$(5!8!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"8"q\e(B") ; Half Form Post "r" | |
349 | ("\\(\e$(5!8!h!O\e(B\\)" "\e$(5!8"q\e(B") ; Post "r" | |
350 | ("\\(\e$(5!8!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"8\e(B") ; Half Form | |
351 | ("\\(\e$(5!9!h!M\e(B\\)" "\e$(5$C\e(B") | |
352 | ("\\(\e$(5!:!h!O\e(B\\)" "\e$(5$D\e(B") | |
353 | ("\\(\e$(5!:!h!<!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"m\e(B") ; Half Form | |
354 | ("\\(\e$(5!:!h!<\e(B\\)" "\e$(5$E\e(B") | |
355 | ("\\(\e$(5!:!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5":\e(B") ; Half Form | |
356 | ("\\(\e$(5!:!i!h!O\e(B\\)" "\e$(5"!\e(B") ; Nukta Post "r" | |
357 | ("\\(\e$(5!:!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"z\e(B") ; Nukta Half Form | |
358 | ("\\(\e$(5!:!i\e(B\\)" "\e$(5#:\e(B") ; Nukta | |
359 | ("\\(\e$(5!;!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5";\e(B") ; Half Form | |
360 | ("\\(\e$(5!<!h\e(B\\)\e$(5!8!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"<\e(B") ; Special Half Form | |
361 | ("\\(\e$(5!<!h!8\e(B\\)" "\e$(5$F\e(B") | |
362 | ("\\(\e$(5!<!h\e(B\\)\e$(5!:!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"<\e(B") ; Special Half Form | |
363 | ("\\(\e$(5!<!h!:\e(B\\)" "\e$(5$G\e(B") | |
364 | ("\\(\e$(5!<!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"<\e(B") ; Half Form | |
365 | ("\\(\e$(5!=!h!3\e(B\\)" "\e$(5$H\e(B") | |
366 | ("\\(\e$(5!=!h!=\e(B\\)" "\e$(5$I\e(B") | |
367 | ("\\(\e$(5!=!h!>\e(B\\)" "\e$(5$J\e(B") | |
368 | ("\\(\e$(5!=!h!M\e(B\\)" "\e$(5$K\e(B") | |
369 | ("\\(\e$(5!>!h!M\e(B\\)" "\e$(5$L\e(B") | |
370 | ("\\(\e$(5!?!h!5!h!M\e(B\\)" "\e$(5$M\e(B" sanskrit) | |
371 | ("\\(\e$(5!?!h!6!h!O\e(B\\)" "\e$(5$N\e(B" sanskrit) | |
372 | ("\\(\e$(5!?!h!O!h!M\e(B\\)" "\e$(5$O\e(B") | |
373 | ("\\(\e$(5!?!h!5\e(B\\)" "\e$(5$P\e(B") | |
374 | ("\\(\e$(5!?!h!6\e(B\\)" "\e$(5$Q\e(B") | |
375 | ("\\(\e$(5!?!h!?\e(B\\)" "\e$(5$R\e(B") | |
376 | ("\\(\e$(5!?!h!L\e(B\\)" "\e$(5$S\e(B") | |
377 | ("\\(\e$(5!?!h!M\e(B\\)" "\e$(5$T\e(B") | |
378 | ("\\(\e$(5!?!i\e(B\\)" "\e$(5#?\e(B") ; Nukta | |
379 | ("\\(\e$(5!@!h!M\e(B\\)" "\e$(5$`\e(B") | |
380 | ("\\(\e$(5!@!i\e(B\\)" "\e$(5#@\e(B") ; Nukta | |
381 | ("\\(\e$(5!A!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"A\e(B") ; Half Form | |
382 | ("\\(\e$(5!B!h\e(B\\)\e$(5!B!h!O\e(B" "\e$(5"B\e(B") ; Special Rule for "t-tr" | |
383 | ("\\(\e$(5!B!h!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"c\e(B") ; Half Form | |
384 | ("\\(\e$(5!B!h!B\e(B\\)" "\e$(5$a\e(B") | |
385 | ("\\(\e$(5!B!h!F\e(B\\)" "\e$(5$b\e(B") | |
386 | ("\\(\e$(5!B!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"d\e(B") ; Half Form Post "r" | |
387 | ("\\(\e$(5!B!h!O\e(B\\)" "\e$(5"%\e(B") ; Post "r" | |
388 | ("\\(\e$(5!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"B\e(B") ; Half Form | |
389 | ("\\(\e$(5!C!h!O\e(B\\)" "\e$(5!C"q\e(B") ; Post "r" | |
390 | ("\\(\e$(5!C!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"C\e(B") ; Half Form | |
391 | ("\\(\e$(5!D!h!D!h!M\e(B\\)" "\e$(5$c\e(B") | |
392 | ("\\(\e$(5!D!h!E!h!M\e(B\\)" "\e$(5$d\e(B") | |
393 | ("\\(\e$(5!D!h!K!h!M\e(B\\)" "\e$(5$e\e(B") | |
394 | ("\\(\e$(5!D!h!K!h!O\e(B\\)" "\e$(5$r"r\e(B") ; Special Case for "dbhr" ; *** | |
395 | ("\\(\e$(5!D!h!O!h!M\e(B\\)" "\e$(5$f\e(B") | |
396 | ("\\(\e$(5!D!h!T!h!M\e(B\\)" "\e$(5$g\e(B") | |
397 | ("\\(\e$(5!D!h!5!h!O\e(B\\)" "\e$(5$h\e(B") | |
398 | ("\\(\e$(5!D!h!6!h!O\e(B\\)" "\e$(5$i\e(B") | |
399 | ("\\(\e$(5!D!h!D!h!T\e(B\\)" "\e$(5$j\e(B") | |
400 | ("\\(\e$(5!D!h!E!h!T\e(B\\)" "\e$(5$k\e(B") | |
401 | ("\\(\e$(5!D!h\e(B\\)\e$(5!E!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5!D!h\e(B") ; Special Half Form (for ddhra) | |
402 | ("\\(\e$(5!D!h!5\e(B\\)" "\e$(5$l\e(B") | |
403 | ("\\(\e$(5!D!h!6\e(B\\)" "\e$(5$m\e(B") | |
404 | ("\\(\e$(5!D!h!D\e(B\\)" "\e$(5$n\e(B") | |
405 | ("\\(\e$(5!D!h!E\e(B\\)" "\e$(5$o\e(B") | |
406 | ("\\(\e$(5!D!h!F\e(B\\)" "\e$(5$p\e(B") | |
407 | ("\\(\e$(5!D!h\e(B\\)\e$(5!J!h\e(B" "\e$(5!D!h\e(B") ; Suppressing "db-" | |
408 | ("\\(\e$(5!D!h!J\e(B\\)" "\e$(5$q\e(B") | |
409 | ("\\(\e$(5!D!h!K\e(B\\)" "\e$(5$r\e(B") | |
410 | ("\\(\e$(5!D!h!L\e(B\\)" "\e$(5$s\e(B") | |
411 | ("\\(\e$(5!D!h!M\e(B\\)" "\e$(5$t\e(B") | |
412 | ("\\(\e$(5!D!h!T\e(B\\)" "\e$(5$u\e(B") | |
413 | ("\\(\e$(5!E!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"e\e(B") ; Half Form | |
414 | ("\\(\e$(5!E!h!F\e(B\\)" "\e$(5$v\e(B") | |
415 | ("\\(\e$(5!E!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"f\e(B") ; Half Form Post "r" | |
416 | ("\\(\e$(5!E!h!O\e(B\\)" "\e$(5!E"q\e(B") ; Post "r" | |
417 | ("\\(\e$(5!E!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"E\e(B") ; Half Form | |
418 | ("\\(\e$(5!F!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"k\e(B") ; Half Form | |
419 | ("\\(\e$(5!F!h!F\e(B\\)" "\e$(5$w\e(B") | |
420 | ("\\(\e$(5!F!h!O\e(B\\)" "\e$(5!F"q\e(B") | |
421 | ("\\(\e$(5!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"F\e(B") ; Half Form | |
422 | ("\\(\e$(5!G!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"G\e(B") ; Nukta Half Form | |
423 | ("\\(\e$(5!H!h\e(B\\)\e$(5!B!h!O\e(B" "\e$(5"H\e(B") ; Special Rule for "p-tr" | |
424 | ("\\(\e$(5!H!h!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"g\e(B") ; Half Form | |
425 | ("\\(\e$(5!H!h!B\e(B\\)" "\e$(5$x\e(B") | |
426 | ("\\(\e$(5!H!h!F\e(B\\)" "\e$(5$y\e(B") | |
427 | ("\\(\e$(5!H!h!Q\e(B\\)" "\e$(5$z\e(B") | |
428 | ("\\(\e$(5!H!h!O\e(B\\)" "\e$(5"&\e(B") ; Post "r" | |
429 | ("\\(\e$(5!H!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"H\e(B") ; Half Form | |
430 | ("\\(\e$(5!I!h!O\e(B\\)" "\e$(5"'\e(B") ; Post "r" | |
431 | ("\\(\e$(5!I!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"I\e(B") ; Half Form | |
432 | ("\\(\e$(5!I!i!h!O\e(B\\)" "\e$(5""\e(B") ; Nukta Post "r" | |
433 | ("\\(\e$(5!I!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"y\e(B") ; Nukta Half Form | |
434 | ("\\(\e$(5!I!i\e(B\\)" "\e$(5#I\e(B") ; Nukta | |
435 | ("\\(\e$(5!J!h\e(B\\)\e$(5!F!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"J\e(B") ; Special Half Form | |
436 | ("\\(\e$(5!J!h!F\e(B\\)" "\e$(5${\e(B") | |
437 | ("\\(\e$(5!J!h\e(B\\)\e$(5!J!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"J\e(B") ; Special Half Form | |
438 | ("\\(\e$(5!J!h!J\e(B\\)" "\e$(5$|\e(B") | |
439 | ("\\(\e$(5!J!h\e(B\\)\e$(5!T!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"J\e(B") ; Special Half Form | |
440 | ("\\(\e$(5!J!h!T\e(B\\)" "\e$(5$}\e(B") | |
441 | ("\\(\e$(5!J!h!O\e(B\\)" "\e$(5!J"q\e(B") ; Post "r" | |
442 | ("\\(\e$(5!J!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"J\e(B") ; Half Form | |
443 | ("\\(\e$(5!K!h!F\e(B\\)" "\e$(5$~\e(B") | |
444 | ("\\(\e$(5!K!h!O\e(B\\)" "\e$(5!K"q\e(B") ; Post "r" | |
445 | ("\\(\e$(5!K!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"K\e(B") ; Half Form | |
446 | ("\\(\e$(5!L!h!F\e(B\\)" "\e$(5#P\e(B") | |
447 | ("\\(\e$(5!L!h!Q\e(B\\)" "\e$(5#Q\e(B") | |
448 | ("\\(\e$(5!L!h!O\e(B\\)" "\e$(5!L"q\e(B") ; Post "r" | |
449 | ("\\(\e$(5!L!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"L\e(B") ; Half Form | |
450 | ("\\(\e$(5!M!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"M\e(B") ; Half Form | |
451 | ("\\(\e$(5!N!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"N\e(B") ; Half Form | |
452 | ;; special form for "ru". | |
453 | ("\\(\e$(5!O!]\e(B\\)" "\e$(5",\e(B") | |
454 | ("\\(\e$(5!O!^\e(B\\)" "\e$(5"-\e(B") | |
455 | ("\\(\e$(5!P!]\e(B\\)" "\e$(5".\e(B") | |
456 | ("\\(\e$(5!P!^\e(B\\)" "\e$(5"/\e(B") | |
4ed46869 | 457 | ;; |
37cdc7ad KH |
458 | ("\\(\e$(5!Q!h!Q\e(B\\)" "\e$(5#`\e(B" sanskrit) |
459 | ("\\(\e$(5!Q!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"Q\e(B") ; Half Form | |
460 | ("\\(\e$(5!R!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"R\e(B") ; Half Form | |
461 | ("\\(\e$(5!S!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"S\e(B") ; Half Form | |
462 | ("\\(\e$(5!T!h!F\e(B\\)" "\e$(5#a\e(B") | |
463 | ("\\(\e$(5!T!h!T\e(B\\)" "\e$(5#b\e(B") | |
464 | ("\\(\e$(5!T!h!O\e(B\\)" "\e$(5!T"q\e(B") ; Post "r" | |
465 | ("\\(\e$(5!T!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"T\e(B") ; Half Form | |
466 | ("\\(\e$(5!U!h!8!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"h\e(B") ; Half Form | |
467 | ("\\(\e$(5!U!h!8\e(B\\)" "\e$(5#c\e(B") | |
468 | ("\\(\e$(5!U!h!F\e(B\\)" "\e$(5#d\e(B") | |
469 | ("\\(\e$(5!U!h!J\e(B\\)" "\e$(5#e\e(B") | |
470 | ("\\(\e$(5!U!h!Q\e(B\\)" "\e$(5#f\e(B") | |
471 | ("\\(\e$(5!U!h\e(B\\)\e$(5!T!h!O\e(B" "\e$(5"U\e(B") ; Special Half Form | |
472 | ("\\(\e$(5!U!h!T!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"j\e(B") ; Half Form | |
473 | ; ("\\(\e$(5!U!h!T\e(B\\)" "\e$(5#g\e(B") | |
474 | ("\\(\e$(5!U!h!O!h!T\e(B\\)" "\e$(5#g\e(B") | |
475 | ("\\(\e$(5!U!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"i\e(B") ; Half Form | |
476 | ("\\(\e$(5!U!h!O\e(B\\)" "\e$(5")\e(B") ; Post "r" | |
477 | ("\\(\e$(5!U!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"U\e(B") ; Half Form | |
478 | ("\\(\e$(5!V!h!=!h!O!h!M\e(B\\)" "\e$(5#h\e(B") | |
479 | ("\\(\e$(5!V!h!=!h!M\e(B\\)" "\e$(5#i\e(B") | |
480 | ("\\(\e$(5!V!h!=!h!T\e(B\\)" "\e$(5#j\e(B") | |
481 | ("\\(\e$(5!V!h!=\e(B\\)" "\e$(5#k\e(B") | |
482 | ("\\(\e$(5!V!h!>\e(B\\)" "\e$(5#l\e(B") | |
483 | ("\\(\e$(5!V!h!O\e(B\\)" "\e$(5!V"q\e(B") ; Post "r" | |
484 | ("\\(\e$(5!V!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"V\e(B") ; Half Form | |
485 | ("\\(\e$(5!W!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"W"F\e(B") ; Special Half Form | |
486 | ("\\(\e$(5!W!h!F\e(B\\)" "\e$(5#m\e(B") | |
487 | ("\\(\e$(5!W!h!O\e(B\\)" "\e$(5#n\e(B") | |
488 | ("\\(\e$(5!W!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"W\e(B") ; Half Form | |
489 | ("\\(\e$(5!X!h!A\e(B\\)" "\e$(5#p\e(B") | |
490 | ("\\(\e$(5!X!h!F\e(B\\)" "\e$(5#q\e(B") | |
491 | ("\\(\e$(5!X!h!L\e(B\\)" "\e$(5#r\e(B") | |
492 | ("\\(\e$(5!X!h!M\e(B\\)" "\e$(5#s\e(B") | |
493 | ("\\(\e$(5!X!h!O\e(B\\)" "\e$(5#t\e(B") | |
494 | ("\\(\e$(5!X!h!Q\e(B\\)" "\e$(5#u\e(B") | |
495 | ("\\(\e$(5!X!h!T\e(B\\)" "\e$(5#v\e(B") | |
496 | ;; Special Ligature Rules | |
497 | ("\\(\e$(5!X!_\e(B\\)" "\e$(5#R\e(B") | |
4ed46869 KH |
498 | |
499 | ;; For consonants other than listed above, glyph-composition will | |
500 | ;; be applied. If the consonant which is preceding "\e$(5!O\e(B" does not | |
501 | ;; have the vertical line (such as "\e$(5!?\e(B"), "\e$(5"r\e(B" is put beneath the | |
502 | ;; consonant. | |
503 | ;; | |
37cdc7ad KH |
504 | ("[\e$(5!7!9!=!>!?!@!D!O!P!R!S!X\e(B]\\(\e$(5!h!O\e(B\\)" "\e$(5"r\e(B") |
505 | ("[\e$(5!6!8!C!E!F!H!J!K!L!M!T!V\e(B]\\(\e$(5!h!O\e(B\\)" "\e$(5"q\e(B") | |
506 | ("\e$(5!?!i\e(B\\(\e$(5!h!O\e(B\\)" "\e$(5"r\e(B") | |
507 | ("\e$(5!@!i\e(B\\(\e$(5!h!O\e(B\\)" "\e$(5"r\e(B") | |
508 | ||
509 | ;; Nukta with Non-Consonants | |
510 | ("\\(\e$(5!!!i\e(B\\)" "\e$(5#!\e(B") | |
511 | ("\\(\e$(5!&!i\e(B\\)" "\e$(5#&\e(B") | |
512 | ("\\(\e$(5!'!i\e(B\\)" "\e$(5#'\e(B") | |
513 | ("\\(\e$(5!*!i\e(B\\)" "\e$(5#*\e(B") | |
514 | ("\\(\e$(5![!i\e(B\\)" "\e$(5#L\e(B") | |
515 | ("\\(\e$(5!\!i\e(B\\)" "\e$(5#M\e(B") | |
516 | ("\\(\e$(5!_!i\e(B\\)" "\e$(5#K\e(B") | |
517 | ("\\(\e$(5!j!i\e(B\\)" "\e$(5#J\e(B") | |
518 | ||
519 | ;; Special rule for "r + some vowels" | |
bb6c9254 KH |
520 | ("\\(\e$(5!O!_!i\e(B\\)" "\e$(5#*"p\e(B") |
521 | ("\\(\e$(5!O![!i\e(B\\)" "\e$(5#&"p\e(B") | |
522 | ("\\(\e$(5!O!\!i\e(B\\)" "\e$(5#'"p\e(B") | |
37cdc7ad | 523 | ("\\(\e$(5!O!_\e(B\\)" "\e$(5!*"p\e(B") |
bd09f27b | 524 | ;; If everything fails, "y" will connect to the front consonant. |
37cdc7ad | 525 | ("\\(\e$(5!h!M\e(B\\)" "\e$(5"]\e(B") |
4ed46869 KH |
526 | ) |
527 | "Alist of regexps of Devanagari character sequences vs composed characters.") | |
528 | ||
4ed46869 KH |
529 | (let ((rules devanagari-char-to-glyph-rules)) |
530 | (while rules | |
531 | (let ((rule (car rules)) | |
37cdc7ad | 532 | (chars) (char) (glyphs) (glyph)) |
4ed46869 KH |
533 | (setq rules (cdr rules)) |
534 | (string-match "\\\\(\\(.+\\)\\\\)" (car rule)) | |
535 | (setq chars (substring (car rule) (match-beginning 1) (match-end 1))) | |
536 | (setq char (string-to-char chars)) | |
37cdc7ad KH |
537 | (setq glyphs (cdr rule)) |
538 | (setq glyph (string-to-char (car glyphs))) | |
4ed46869 | 539 | (put-char-code-property |
37cdc7ad KH |
540 | char 'char-to-glyph |
541 | ;; We don't "cons" it since priority is top to down. | |
542 | (append (get-char-code-property char 'char-to-glyph) (list rule))) | |
543 | ||
544 | (if (and (< ?\e(5z\e(B glyph) ; Glyphs only. | |
545 | (null (get-char-code-property glyph 'glyph-to-char))) | |
546 | ; One glyph may corresponds to multiple characters, | |
547 | ; e.g., surrounding vowel in Tamil, etc. | |
548 | ; but for Devanagari, we put this restriction | |
549 | ; to make sure the fact that one glyph corresponds to one char. | |
550 | (put-char-code-property | |
551 | glyph 'glyph-to-char | |
552 | (cons (list (car glyphs) chars) | |
553 | (get-char-code-property glyph 'glyph-to-char) | |
554 | )))))) | |
4ed46869 KH |
555 | |
556 | ;; | |
37cdc7ad KH |
557 | ;; Function used in both characters-to-glyphs conversion and |
558 | ;; glyphs-to-characters conversion. | |
4ed46869 KH |
559 | ;; |
560 | ||
37cdc7ad | 561 | (defun max-match-len (regexp-str) |
27463ede KH |
562 | "Return the possible length of matched string of given regexp. |
563 | Only [...] pattern of regexp is recognized. | |
564 | The last character of inside of [....] is used for its length." | |
37cdc7ad KH |
565 | (let ((dest-str regexp-str)) |
566 | (while (string-match "\\[\\([^\]]\\)+\\]" dest-str) | |
567 | (setq dest-str | |
568 | (concat (substring dest-str 0 (match-beginning 0)) | |
569 | (substring dest-str (match-beginning 1) (match-end 1)) | |
570 | (substring dest-str (match-end 0))))) | |
571 | (length dest-str))) | |
572 | ||
27463ede KH |
573 | ;; Return t iff LIST1 and LIST2 has a same member. |
574 | (defun rule-intersection (list1 list2) | |
575 | (let ((found nil)) | |
576 | (while (and list1 (not found)) | |
577 | (if (memq (car list1) list2) | |
578 | (setq found t) | |
579 | (setq list1 (cdr list1)))) | |
580 | found)) | |
581 | ||
37cdc7ad | 582 | (defun string-conversion-by-rule (src-str symbol &rest specs) |
27463ede | 583 | "Convert string SRC-STR to a new string according to |
37cdc7ad KH |
584 | the rules described in the each character's SYMBOL property. The |
585 | rules are described in the forms of '((regexp str <specs>) ...), and | |
586 | the character sequence in the string which matches to 'regexp' are | |
587 | replaced with str. If SPECS are not specified, only rules with no | |
588 | <specs> would be applied. If SPECS are specified, then rules with no | |
589 | <specs> specified and rules with <spec> matches with SPECS would be | |
590 | applied. Rules are tested in the order of the list, thus more | |
591 | specific rules should be placed in front of less important rules. No | |
592 | composite character is supported, thus such must be converted by | |
593 | decompose-char before applying to this function. If rule is given in | |
594 | the forms of regexp '...\\(...\\)...', then inside the parenthesis is | |
595 | the subject of the match. Otherwise, the entire expression is the | |
596 | subject of the match." | |
4ed46869 KH |
597 | (let ((pos 0) |
598 | (dst-str "")) | |
599 | (while (< pos (length src-str)) | |
600 | (let ((found nil) | |
601 | (rules (get-char-code-property | |
602 | (string-to-char | |
37cdc7ad | 603 | (substring src-str pos)) symbol))) |
4ed46869 KH |
604 | (while rules |
605 | (let* ((rule (car rules)) | |
37cdc7ad KH |
606 | (regexp (car rule)) |
607 | (replace-str (car (cdr rule))) | |
608 | (rule-specs (cdr (cdr rule))) | |
609 | search-pos) | |
610 | (if (not (or (null rule-specs) | |
27463ede | 611 | (rule-intersection specs rule-specs))) |
37cdc7ad KH |
612 | (setq rules (cdr rules)) |
613 | (if (null (string-match "\\\\(.+\\\\)" regexp)) | |
614 | (progn | |
615 | (setq regexp (concat "\\(" regexp "\\)")) | |
616 | (setq search-pos pos)) | |
617 | (setq search-pos (- pos (max-match-len | |
618 | (substring regexp | |
619 | (string-match "^[^\\\\]*" regexp) | |
620 | (match-end 0)))))) | |
621 | (if (< search-pos 0) (setq search-pos 0)) | |
622 | (if (string-match regexp src-str search-pos) | |
623 | (if (= (match-beginning 1) pos) | |
624 | (progn | |
625 | (setq dst-str (concat dst-str replace-str)) | |
626 | (setq rules nil) ; Get out of the loop. | |
627 | (setq found t) | |
628 | ;; proceed `pos' for replaced characters. | |
629 | (setq pos (match-end 1))) | |
630 | (setq rules (cdr rules))) | |
631 | (setq rules (cdr rules)))))) | |
4ed46869 KH |
632 | ;; proceed to next position |
633 | (if (not found) | |
27463ede KH |
634 | (setq dst-str (concat dst-str (substring src-str pos (1+ pos))) |
635 | pos (1+ pos))))) | |
4ed46869 KH |
636 | dst-str)) |
637 | ||
37cdc7ad KH |
638 | |
639 | ;; | |
640 | ;; Convert Character Code to Glyph Code | |
641 | ;; | |
642 | ||
643 | ;;;###autoload | |
644 | (defun char-to-glyph-devanagari (src-str &rest langs) | |
645 | "Convert Devanagari characters in the string to Devanagari glyphs. | |
646 | Ligatures and special rules are processed." | |
647 | (apply | |
648 | 'string-conversion-by-rule | |
649 | (append (list src-str 'char-to-glyph) langs))) | |
650 | ||
4ed46869 KH |
651 | ;; Example: |
652 | ;;(char-to-glyph-devanagari "\e$(5!X![!F!h!D!\\e(B") => "\e$(5!X!["F!D!\\e(B" | |
653 | ;;(char-to-glyph-devanagari "\e$(5!O!Z!V!h!=!h!O![!M\e(B") => ??? | |
654 | ||
655 | ;; | |
656 | ;; Phase 2: Compose Glyphs to form One Glyph. | |
657 | ;; | |
658 | ||
659 | ;; Each list consist of glyph, application-priority and application-direction. | |
660 | ;; | |
661 | ;; Glyphs will be ordered from low priority number to high priority number. | |
662 | ;; If application-priority is omitted, it is assumed to be 0. | |
663 | ;; If application-direction is omitted, it is asumbed to be '(mr . ml). | |
664 | ||
665 | (defconst devanagari-composition-rules | |
37cdc7ad KH |
666 | '((?\e$(5!!\e(B 0 (tr . br)) |
667 | (?\e$(5!"\e(B 0 (mr . mr)) | |
668 | (?\e$(5!#\e(B 0) | |
4ed46869 KH |
669 | (?\e$(5!$\e(B 0) |
670 | (?\e$(5!%\e(B 0) | |
671 | (?\e$(5!&\e(B 0) | |
672 | (?\e$(5!'\e(B 0) | |
673 | (?\e$(5!(\e(B 0) | |
674 | (?\e$(5!)\e(B 0) | |
675 | (?\e$(5!*\e(B 0) | |
676 | (?\e$(5!+\e(B 0) | |
677 | (?\e$(5!,\e(B 0) | |
678 | (?\e$(5!-\e(B 0) | |
679 | (?\e$(5!.\e(B 0) | |
680 | (?\e$(5!/\e(B 0) | |
681 | (?\e$(5!0\e(B 0) | |
682 | (?\e$(5!1\e(B 0) | |
683 | (?\e$(5!2\e(B 0) | |
684 | (?\e$(5!3\e(B 0) | |
685 | (?\e$(5!4\e(B 0) | |
686 | (?\e$(5!5\e(B 0) | |
687 | (?\e$(5!6\e(B 0) | |
688 | (?\e$(5!7\e(B 0) | |
689 | (?\e$(5!8\e(B 0) | |
690 | (?\e$(5!9\e(B 0) | |
691 | (?\e$(5!:\e(B 0) | |
692 | (?\e$(5!;\e(B 0) | |
693 | (?\e$(5!<\e(B 0) | |
694 | (?\e$(5!=\e(B 0) | |
695 | (?\e$(5!>\e(B 0) | |
696 | (?\e$(5!?\e(B 0) | |
697 | (?\e$(5!@\e(B 0) | |
698 | (?\e$(5!A\e(B 0) | |
699 | (?\e$(5!B\e(B 0) | |
700 | (?\e$(5!C\e(B 0) | |
701 | (?\e$(5!D\e(B 0) | |
702 | (?\e$(5!E\e(B 0) | |
703 | (?\e$(5!F\e(B 0) | |
704 | (?\e$(5!G\e(B 0) | |
705 | (?\e$(5!H\e(B 0) | |
706 | (?\e$(5!I\e(B 0) | |
707 | (?\e$(5!J\e(B 0) | |
708 | (?\e$(5!K\e(B 0) | |
709 | (?\e$(5!L\e(B 0) | |
710 | (?\e$(5!M\e(B 0) | |
711 | (?\e$(5!N\e(B 0) | |
712 | (?\e$(5!O\e(B 0) | |
713 | (?\e$(5!P\e(B 0) | |
714 | (?\e$(5!Q\e(B 0) | |
715 | (?\e$(5!R\e(B 0) | |
716 | (?\e$(5!S\e(B 0) | |
717 | (?\e$(5!T\e(B 0) | |
718 | (?\e$(5!U\e(B 0) | |
719 | (?\e$(5!V\e(B 0) | |
720 | (?\e$(5!W\e(B 0) | |
721 | (?\e$(5!X\e(B 0) | |
722 | (?\e$(5!Y\e(B 0) | |
37cdc7ad KH |
723 | (?\e$(5!Z\e(B 0) |
724 | (?\e$(5![\e(B 0 (ml . mr)) | |
725 | (?\e$(5!\\e(B 0) | |
726 | (?\e$(5!]\e(B 0 (br . tr)) | |
727 | (?\e$(5!^\e(B 0 (br . tr)) | |
728 | (?\e$(5!_\e(B 0 (br . tr)) | |
729 | (?\e$(5!`\e(B 0 (mr . mr)) ; (tc . bc) | |
730 | (?\e$(5!a\e(B 0 (mr . mr)) | |
731 | (?\e$(5!b\e(B 0 (mr . mr)) | |
732 | (?\e$(5!c\e(B 0 (mr . mr)) | |
733 | (?\e$(5!d\e(B 0) | |
734 | (?\e$(5!e\e(B 0) | |
735 | (?\e$(5!f\e(B 0) | |
736 | (?\e$(5!g\e(B 0) | |
bd09f27b KH |
737 | (?\e$(5!h\e(B 0 (br . tr)) |
738 | (?\e$(5!i\e(B 0 (br . tr)) | |
4ed46869 KH |
739 | (?\e$(5!j\e(B 0) |
740 | (nil 0) | |
741 | (nil 0) | |
742 | (nil 0) | |
743 | (nil 0) | |
744 | (nil 0) | |
745 | (nil 0) | |
746 | (?\e$(5!q\e(B 0) | |
747 | (?\e$(5!r\e(B 0) | |
748 | (?\e$(5!s\e(B 0) | |
749 | (?\e$(5!t\e(B 0) | |
750 | (?\e$(5!u\e(B 0) | |
751 | (?\e$(5!v\e(B 0) | |
752 | (?\e$(5!w\e(B 0) | |
753 | (?\e$(5!x\e(B 0) | |
754 | (?\e$(5!y\e(B 0) | |
755 | (?\e$(5!z\e(B 0) | |
756 | (nil 0) | |
757 | (nil 0) | |
758 | (nil 0) | |
759 | (nil 0) | |
760 | (?\e$(5"!\e(B 0) | |
761 | (?\e$(5""\e(B 0) | |
762 | (?\e$(5"#\e(B 0) | |
763 | (?\e$(5"$\e(B 0) | |
764 | (?\e$(5"%\e(B 0) | |
765 | (?\e$(5"&\e(B 0) | |
766 | (?\e$(5"'\e(B 0) | |
767 | (?\e$(5"(\e(B 0) | |
768 | (?\e$(5")\e(B 0) | |
769 | (?\e$(5"*\e(B 0) | |
770 | (?\e$(5"+\e(B 0) | |
771 | (?\e$(5",\e(B 0) | |
772 | (?\e$(5"-\e(B 0) | |
773 | (?\e$(5".\e(B 0) | |
774 | (?\e$(5"/\e(B 0) | |
775 | (?\e$(5"0\e(B 0) | |
776 | (?\e$(5"1\e(B 0) | |
777 | (?\e$(5"2\e(B 0) | |
778 | (?\e$(5"3\e(B 0) | |
779 | (?\e$(5"4\e(B 0) | |
780 | (?\e$(5"5\e(B 0) | |
781 | (?\e$(5"6\e(B 0) | |
782 | (?\e$(5"7\e(B 0) | |
783 | (?\e$(5"8\e(B 0) | |
784 | (?\e$(5"9\e(B 0) | |
785 | (?\e$(5":\e(B 0) | |
786 | (?\e$(5";\e(B 0) | |
787 | (?\e$(5"<\e(B 0) | |
788 | (?\e$(5"=\e(B 0) | |
789 | (?\e$(5">\e(B 0) | |
790 | (?\e$(5"?\e(B 0) | |
791 | (?\e$(5"@\e(B 0) | |
792 | (?\e$(5"A\e(B 0) | |
793 | (?\e$(5"B\e(B 0) | |
794 | (?\e$(5"C\e(B 0) | |
795 | (?\e$(5"D\e(B 0) | |
796 | (?\e$(5"E\e(B 0) | |
797 | (?\e$(5"F\e(B 0) | |
798 | (?\e$(5"G\e(B 0) | |
799 | (?\e$(5"H\e(B 0) | |
800 | (?\e$(5"I\e(B 0) | |
801 | (?\e$(5"J\e(B 0) | |
802 | (?\e$(5"K\e(B 0) | |
803 | (?\e$(5"L\e(B 0) | |
804 | (?\e$(5"M\e(B 0) | |
805 | (?\e$(5"N\e(B 0) | |
806 | (?\e$(5"O\e(B 0) | |
807 | (?\e$(5"P\e(B 0) | |
808 | (?\e$(5"Q\e(B 0) | |
809 | (?\e$(5"R\e(B 0) | |
810 | (?\e$(5"S\e(B 0) | |
811 | (?\e$(5"T\e(B 0) | |
812 | (?\e$(5"U\e(B 0) | |
813 | (?\e$(5"V\e(B 0) | |
814 | (?\e$(5"W\e(B 0) | |
815 | (?\e$(5"X\e(B 0) | |
816 | (?\e$(5"Y\e(B 0) | |
817 | (?\e$(5"Z\e(B 0) | |
818 | (?\e$(5"[\e(B 0) | |
819 | (?\e$(5"\\e(B 0) | |
820 | (?\e$(5"]\e(B 0) | |
821 | (?\e$(5"^\e(B 0) | |
822 | (?\e$(5"_\e(B 0) | |
823 | (?\e$(5"`\e(B 0) | |
824 | (?\e$(5"a\e(B 0) | |
825 | (?\e$(5"b\e(B 0) | |
826 | (?\e$(5"c\e(B 0) | |
827 | (?\e$(5"d\e(B 0) | |
828 | (?\e$(5"e\e(B 0) | |
829 | (?\e$(5"f\e(B 0) | |
830 | (?\e$(5"g\e(B 0) | |
831 | (?\e$(5"h\e(B 0) | |
832 | (?\e$(5"i\e(B 0) | |
833 | (?\e$(5"j\e(B 0) | |
834 | (?\e$(5"k\e(B 0) | |
835 | (?\e$(5"l\e(B 0) | |
836 | (?\e$(5"m\e(B 0) | |
837 | (?\e$(5"n\e(B 0) | |
838 | (?\e$(5"o\e(B 0) | |
37cdc7ad KH |
839 | (?\e$(5"p\e(B 10 (mr . mr)) |
840 | (?\e$(5"q\e(B 0 (br . br)) | |
841 | (?\e$(5"r\e(B 0 (br . tr)) | |
4ed46869 KH |
842 | (?\e$(5"s\e(B 0) |
843 | (?\e$(5"t\e(B 0) | |
844 | (?\e$(5"u\e(B 0) | |
845 | (?\e$(5"v\e(B 0) | |
846 | (?\e$(5"w\e(B 0) | |
847 | (?\e$(5"x\e(B 0) | |
848 | (?\e$(5"y\e(B 0) | |
849 | (?\e$(5"z\e(B 0) | |
850 | (?\e$(5"{\e(B 0) | |
851 | (?\e$(5"|\e(B 0) | |
852 | (?\e$(5"}\e(B 0) | |
853 | (?\e$(5"~\e(B 0) | |
854 | (?\e$(5#!\e(B 0) | |
855 | (?\e$(5#"\e(B 0) | |
856 | (?\e$(5##\e(B 0) | |
857 | (?\e$(5#$\e(B 0) | |
858 | (?\e$(5#%\e(B 0) | |
859 | (?\e$(5#&\e(B 0) | |
860 | (?\e$(5#'\e(B 0) | |
861 | (?\e$(5#(\e(B 0) | |
862 | (?\e$(5#)\e(B 0) | |
863 | (?\e$(5#*\e(B 0) | |
864 | (?\e$(5#+\e(B 0) | |
865 | (?\e$(5#,\e(B 0) | |
866 | (?\e$(5#-\e(B 0) | |
867 | (?\e$(5#.\e(B 0) | |
868 | (?\e$(5#/\e(B 0) | |
869 | (?\e$(5#0\e(B 0) | |
870 | (?\e$(5#1\e(B 0) | |
871 | (?\e$(5#2\e(B 0) | |
872 | (?\e$(5#3\e(B 0) | |
873 | (?\e$(5#4\e(B 0) | |
874 | (?\e$(5#5\e(B 0) | |
875 | (?\e$(5#6\e(B 0) | |
876 | (?\e$(5#7\e(B 0) | |
877 | (?\e$(5#8\e(B 0) | |
878 | (?\e$(5#9\e(B 0) | |
879 | (?\e$(5#:\e(B 0) | |
880 | (?\e$(5#;\e(B 0) | |
881 | (?\e$(5#<\e(B 0) | |
882 | (?\e$(5#=\e(B 0) | |
883 | (?\e$(5#>\e(B 0) | |
884 | (?\e$(5#?\e(B 0) | |
885 | (?\e$(5#@\e(B 0) | |
886 | (?\e$(5#A\e(B 0) | |
887 | (?\e$(5#B\e(B 0) | |
888 | (?\e$(5#C\e(B 0) | |
889 | (?\e$(5#D\e(B 0) | |
890 | (?\e$(5#E\e(B 0) | |
891 | (?\e$(5#F\e(B 0) | |
892 | (?\e$(5#G\e(B 0) | |
893 | (?\e$(5#H\e(B 0) | |
894 | (?\e$(5#I\e(B 0) | |
895 | (?\e$(5#J\e(B 0) | |
37cdc7ad KH |
896 | (?\e$(5#K\e(B 0 (br . tr)) |
897 | (?\e$(5#L\e(B 0 (br . tr)) | |
898 | (?\e$(5#M\e(B 0 (br . tr)) | |
4ed46869 KH |
899 | (?\e$(5#N\e(B 0) |
900 | (?\e$(5#O\e(B 0) | |
901 | (?\e$(5#P\e(B 0) | |
902 | (?\e$(5#Q\e(B 0) | |
903 | (?\e$(5#R\e(B 0) | |
904 | (?\e$(5#S\e(B 0) | |
905 | (?\e$(5#T\e(B 0) | |
906 | (?\e$(5#U\e(B 0) | |
907 | (?\e$(5#V\e(B 0) | |
908 | (?\e$(5#W\e(B 0) | |
909 | (?\e$(5#X\e(B 0) | |
910 | (?\e$(5#Y\e(B 0) | |
911 | (?\e$(5#Z\e(B 0) | |
912 | (?\e$(5#[\e(B 0) | |
913 | (?\e$(5#\\e(B 0) | |
914 | (?\e$(5#]\e(B 0) | |
915 | (?\e$(5#^\e(B 0) | |
916 | (?\e$(5#_\e(B 0) | |
917 | (?\e$(5#`\e(B 0) | |
918 | (?\e$(5#a\e(B 0) | |
919 | (?\e$(5#b\e(B 0) | |
920 | (?\e$(5#c\e(B 0) | |
921 | (?\e$(5#d\e(B 0) | |
922 | (?\e$(5#e\e(B 0) | |
923 | (?\e$(5#f\e(B 0) | |
924 | (?\e$(5#g\e(B 0) | |
925 | (?\e$(5#h\e(B 0) | |
926 | (?\e$(5#i\e(B 0) | |
927 | (?\e$(5#j\e(B 0) | |
928 | (?\e$(5#k\e(B 0) | |
929 | (?\e$(5#l\e(B 0) | |
930 | (?\e$(5#m\e(B 0) | |
931 | (?\e$(5#n\e(B 0) | |
932 | (?\e$(5#o\e(B 0) | |
933 | (?\e$(5#p\e(B 0) | |
934 | (?\e$(5#q\e(B 0) | |
935 | (?\e$(5#r\e(B 0) | |
936 | (?\e$(5#s\e(B 0) | |
937 | (?\e$(5#t\e(B 0) | |
938 | (?\e$(5#u\e(B 0) | |
939 | (?\e$(5#v\e(B 0) | |
940 | (?\e$(5#w\e(B 0) | |
941 | (?\e$(5#x\e(B 0) | |
942 | (?\e$(5#y\e(B 0) | |
943 | (?\e$(5#z\e(B 0) | |
944 | (?\e$(5#{\e(B 0) | |
945 | (?\e$(5#|\e(B 0) | |
946 | (?\e$(5#}\e(B 0) | |
947 | (?\e$(5#~\e(B 0) | |
948 | (?\e$(5$!\e(B 0) | |
949 | (?\e$(5$"\e(B 0) | |
950 | (?\e$(5$#\e(B 0) | |
951 | (?\e$(5$$\e(B 0) | |
952 | (?\e$(5$%\e(B 0) | |
953 | (?\e$(5$&\e(B 0) | |
954 | (?\e$(5$'\e(B 0) | |
955 | (?\e$(5$(\e(B 0) | |
956 | (?\e$(5$)\e(B 0) | |
957 | (?\e$(5$*\e(B 0) | |
958 | (?\e$(5$+\e(B 0) | |
959 | (?\e$(5$,\e(B 0) | |
960 | (?\e$(5$-\e(B 0) | |
961 | (?\e$(5$.\e(B 0) | |
962 | (?\e$(5$/\e(B 0) | |
963 | (?\e$(5$0\e(B 0) | |
964 | (?\e$(5$1\e(B 0) | |
965 | (?\e$(5$2\e(B 0) | |
966 | (?\e$(5$3\e(B 0) | |
967 | (?\e$(5$4\e(B 0) | |
968 | (?\e$(5$5\e(B 0) | |
969 | (?\e$(5$6\e(B 0) | |
970 | (?\e$(5$7\e(B 0) | |
971 | (?\e$(5$8\e(B 0) | |
972 | (?\e$(5$9\e(B 0) | |
973 | (?\e$(5$:\e(B 0) | |
974 | (?\e$(5$;\e(B 0) | |
975 | (?\e$(5$<\e(B 0) | |
976 | (?\e$(5$=\e(B 0) | |
977 | (?\e$(5$>\e(B 0) | |
978 | (?\e$(5$?\e(B 0) | |
979 | (?\e$(5$@\e(B 0) | |
980 | (?\e$(5$A\e(B 0) | |
981 | (?\e$(5$B\e(B 0) | |
982 | (?\e$(5$C\e(B 0) | |
983 | (?\e$(5$D\e(B 0) | |
984 | (?\e$(5$E\e(B 0) | |
985 | (?\e$(5$F\e(B 0) | |
986 | (?\e$(5$G\e(B 0) | |
987 | (?\e$(5$H\e(B 0) | |
988 | (?\e$(5$I\e(B 0) | |
989 | (?\e$(5$J\e(B 0) | |
990 | (?\e$(5$K\e(B 0) | |
991 | (?\e$(5$L\e(B 0) | |
992 | (?\e$(5$M\e(B 0) | |
993 | (?\e$(5$N\e(B 0) | |
994 | (?\e$(5$O\e(B 0) | |
995 | (?\e$(5$P\e(B 0) | |
996 | (?\e$(5$Q\e(B 0) | |
997 | (?\e$(5$R\e(B 0) | |
998 | (?\e$(5$S\e(B 0) | |
999 | (?\e$(5$T\e(B 0) | |
1000 | (?\e$(5$U\e(B 0) | |
1001 | (?\e$(5$V\e(B 0) | |
1002 | (?\e$(5$W\e(B 0) | |
1003 | (?\e$(5$X\e(B 0) | |
1004 | (?\e$(5$Y\e(B 0) | |
1005 | (?\e$(5$Z\e(B 0) | |
1006 | (?\e$(5$[\e(B 0) | |
1007 | (?\e$(5$\\e(B 0) | |
1008 | (?\e$(5$]\e(B 0) | |
1009 | (?\e$(5$^\e(B 0) | |
1010 | (?\e$(5$_\e(B 0) | |
1011 | (?\e$(5$`\e(B 0) | |
1012 | (?\e$(5$a\e(B 0) | |
1013 | (?\e$(5$b\e(B 0) | |
1014 | (?\e$(5$c\e(B 0) | |
1015 | (?\e$(5$d\e(B 0) | |
1016 | (?\e$(5$e\e(B 0) | |
1017 | (?\e$(5$f\e(B 0) | |
1018 | (?\e$(5$g\e(B 0) | |
1019 | (?\e$(5$h\e(B 0) | |
1020 | (?\e$(5$i\e(B 0) | |
1021 | (?\e$(5$j\e(B 0) | |
1022 | (?\e$(5$k\e(B 0) | |
1023 | (?\e$(5$l\e(B 0) | |
1024 | (?\e$(5$m\e(B 0) | |
1025 | (?\e$(5$n\e(B 0) | |
1026 | (?\e$(5$o\e(B 0) | |
1027 | (?\e$(5$p\e(B 0) | |
1028 | (?\e$(5$q\e(B 0) | |
1029 | (?\e$(5$r\e(B 0) | |
1030 | (?\e$(5$s\e(B 0) | |
1031 | (?\e$(5$t\e(B 0) | |
1032 | (?\e$(5$u\e(B 0) | |
1033 | (?\e$(5$v\e(B 0) | |
1034 | (?\e$(5$w\e(B 0) | |
1035 | (?\e$(5$x\e(B 0) | |
1036 | (?\e$(5$y\e(B 0) | |
1037 | (?\e$(5$z\e(B 0) | |
1038 | (?\e$(5${\e(B 0) | |
1039 | (?\e$(5$|\e(B 0) | |
1040 | (?\e$(5$}\e(B 0) | |
1041 | (?\e$(5$~\e(B 0) | |
1042 | )) | |
1043 | ||
1044 | ;; Determine composition priority and rule of the array of Glyphs. | |
1045 | ;; Sort the glyphs with their priority. | |
1046 | ||
37cdc7ad | 1047 | (defun devanagari-reorder-glyphs-for-composition (glyph-alist) |
4ed46869 KH |
1048 | (let* ((pos 0) |
1049 | (ordered-glyphs '())) | |
1050 | (while (< pos (length glyph-alist)) | |
1051 | (let* ((glyph (aref glyph-alist pos))) | |
1052 | (setq pos (1+ pos)) | |
1053 | (setq ordered-glyphs | |
1054 | (append ordered-glyphs (list (assq glyph devanagari-composition-rules)))))) | |
1055 | (sort ordered-glyphs '(lambda (x y) (< (car (cdr x)) (car (cdr y))))))) | |
1056 | ||
27463ede | 1057 | ;;(devanagari-compose-to-one-glyph "\e$(5"5!X![\e(B") => "\e2\e$(6!XP"5@![\e(B\e1" |
4ed46869 KH |
1058 | |
1059 | (defun devanagari-compose-to-one-glyph (devanagari-string) | |
37cdc7ad | 1060 | (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition |
4ed46869 KH |
1061 | (string-to-vector devanagari-string))) |
1062 | ;; List of glyphs to be composed. | |
1063 | (cmp-glyph-list (list (car (car o-glyph-list)))) | |
1064 | (o-glyph-list (cdr o-glyph-list))) | |
1065 | (while o-glyph-list | |
1066 | (let* ((o-glyph (car o-glyph-list)) | |
1067 | (glyph (if (< 2 (length o-glyph)) | |
1068 | ;; default composition | |
1069 | (list (car (cdr (cdr o-glyph))) (car o-glyph)) | |
1070 | ;; composition with a specified rule | |
1071 | (list '(mr . ml) (car o-glyph))))) | |
1072 | (setq o-glyph-list (cdr o-glyph-list)) | |
1073 | (setq cmp-glyph-list (append cmp-glyph-list glyph)))) | |
1074 | ;; Before applying compose-chars, convert glyphs to | |
1075 | ;; 1-column width if possible. | |
1076 | (setq cmp-glyph-list (devanagari-wide-to-narrow cmp-glyph-list)) | |
1077 | (if (= (length cmp-glyph-list) 1) (char-to-string (car cmp-glyph-list)) | |
1078 | (apply 'compose-chars cmp-glyph-list)))) | |
1079 | ||
bd09f27b KH |
1080 | ;; Utility function for Phase 2.5 |
1081 | ;; Check whether given glyph is a Devanagari vertical modifier or not. | |
1082 | ;; If it is a vertical modifier, whether it should be 1-column shape or not | |
1083 | ;; depends on previous non-vertical modifier. | |
1084 | ; return nil if it is not vertical modifier. | |
1085 | (defun devanagari-vertical-modifier-p (glyph) | |
1086 | (string-match (char-to-string glyph) | |
907c83c8 | 1087 | "[\e$(5!"!]!^!_!`!a!b!c!h!i"p"q"r#K#L#M\e(B]")) |
bd09f27b KH |
1088 | |
1089 | (defun devanagari-non-vertical-modifier-p (glyph) | |
1090 | (string-match (char-to-string glyph) | |
907c83c8 KH |
1091 | ; "[\e$(5!Z![!\!d!e!f!g\e(B]")) |
1092 | "[\e$(5![\e(B]")) | |
bd09f27b | 1093 | |
907c83c8 KH |
1094 | (defun devanagari-wide-to-narrow-char (char) |
1095 | "Return the corresponding narrow character if it exists." | |
1096 | (let ((narrow (cdr (assq char devanagari-1-column-char)))) | |
1097 | (if narrow narrow char))) | |
4ed46869 KH |
1098 | |
1099 | ;; | |
1100 | ;; Phase 2.5 Convert Appropriate Character to 1-column shape. | |
1101 | ;; | |
1102 | ;; This is temporary and should be removed out when Emacs supports | |
1103 | ;; variable width characters. | |
1104 | ;; | |
1105 | ;; This will convert the composing glyphs (2 column glyphs) | |
1106 | ;; to narrow (1 column) glyphs if they exist. | |
1107 | ;; | |
1108 | ;; devanagari-wide-to-narrow-old converts glyphs simply. | |
1109 | ;; devanagari-wide-to-narrow takes care of upper/lower apply-glyphs | |
1110 | ;; with 2 column base-glyph. | |
1111 | ;; | |
1112 | ;; Execution Examples | |
bd09f27b KH |
1113 | ;;(devanagari-wide-to-narrow '(?\e$(5!3\e(B (ml . ml) ?\e$(5!a\e(B)) |
1114 | ;;(devanagari-wide-to-narrow '(?\e$(5!F\e(B (ml . ml) ?\e$(5!a\e(B)) | |
1115 | ||
4ed46869 | 1116 | (defun devanagari-wide-to-narrow (src-list) |
bd09f27b KH |
1117 | (devanagari-wide-to-narrow-iter src-list t)) |
1118 | ||
907c83c8 | 1119 | (defun devanagari-wide-to-narrow-iter (src-list 2-col-glyph) |
bd09f27b KH |
1120 | (let ((glyph (car src-list))) |
1121 | (cond ((null src-list) '()) | |
1122 | ; not glyph code | |
1123 | ((not (numberp glyph)) | |
907c83c8 KH |
1124 | (cons glyph (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph))) |
1125 | ; glyphs to be processed regardless of the value of "2-col-glyph" | |
bd09f27b | 1126 | ((devanagari-non-vertical-modifier-p glyph) |
907c83c8 KH |
1127 | (cons (devanagari-wide-to-narrow-char glyph) |
1128 | (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph))) | |
1129 | ; glyphs which are depends on the value of "2-col-glyph" | |
1130 | ((devanagari-vertical-modifier-p glyph) | |
1131 | (if 2-col-glyph | |
bd09f27b | 1132 | (cons glyph |
907c83c8 KH |
1133 | (devanagari-wide-to-narrow-iter (cdr src-list) t)) |
1134 | (cons (devanagari-wide-to-narrow-char glyph) | |
1135 | (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph)))) | |
bd09f27b KH |
1136 | ; normal glyph |
1137 | (t | |
1138 | (if (cdr (assq glyph devanagari-1-column-char)) | |
907c83c8 | 1139 | (cons (devanagari-wide-to-narrow-char glyph) |
bd09f27b KH |
1140 | (devanagari-wide-to-narrow-iter (cdr src-list) nil)) |
1141 | (cons glyph | |
1142 | (devanagari-wide-to-narrow-iter (cdr src-list) t))))))) | |
1143 | ||
4ed46869 KH |
1144 | |
1145 | ;; | |
1146 | ;; Summary | |
1147 | ;; | |
1148 | ||
4ed46869 KH |
1149 | ;; |
1150 | ;; Decomposition of composite font. | |
1151 | ;; | |
1152 | ||
1153 | (defun devanagari-normalize-narrow-glyph (charlist) | |
1154 | (let ((wide-char (car (rassoc (car charlist) devanagari-1-column-char)))) | |
1155 | (if (null charlist) nil | |
1156 | (cons (if (null wide-char) (car charlist) wide-char) | |
1157 | (devanagari-normalize-narrow-glyph (cdr charlist)))))) | |
1158 | ||
1159 | (defvar devanagari-decomposition-rules | |
1160 | '( | |
37cdc7ad | 1161 | (?\e$(5"p\e(B -10) |
4ed46869 KH |
1162 | ) |
1163 | ) | |
1164 | ||
37cdc7ad KH |
1165 | (defun devanagari-reorder-glyphs-for-decomposition (glyphlist) |
1166 | "This function re-orders glyph list for decomposition." | |
4ed46869 KH |
1167 | (sort glyphlist |
1168 | '(lambda (x y) | |
1169 | (let ((xx (assoc x devanagari-decomposition-rules)) | |
1170 | (yy (assoc y devanagari-decomposition-rules))) | |
1171 | (if (null xx) (setq xx 0)) | |
1172 | (if (null yy) (setq yy 0)) | |
1173 | (< xx yy))))) | |
1174 | ||
37cdc7ad KH |
1175 | (defun devanagari-decompose-char (glyph) |
1176 | "This function decomposes one Devanagari composite glyph to | |
1177 | basic Devanagari characters as a string." | |
1178 | (let ((glyphlist | |
1179 | (if (eq (car (split-char glyph)) 'composition) | |
1180 | (string-to-list (decompose-composite-char glyph)) | |
1181 | (list glyph)))) | |
4ed46869 | 1182 | (setq glyphlist (devanagari-normalize-narrow-glyph glyphlist)) |
37cdc7ad KH |
1183 | (setq glyphlist (devanagari-reorder-glyphs-for-decomposition glyphlist)) |
1184 | (string-conversion-by-rule | |
1185 | (mapconcat 'char-to-string glyphlist "") 'glyph-to-char))) | |
4ed46869 KH |
1186 | |
1187 | ;;;###autoload | |
1188 | (defun devanagari-decompose-string (str) | |
27463ede KH |
1189 | "Decompose Devanagari glyph string STR to basic Devanagari character string." |
1190 | (let ((len (length str)) | |
1191 | (i 0) | |
1192 | (dst "")) | |
1193 | (while (< i len) | |
1194 | (setq dst (concat dst (devanagari-decompose-char (aref str i))) | |
1195 | i (1+ i))) | |
4ed46869 KH |
1196 | dst)) |
1197 | ||
1198 | ;;;###autoload | |
1199 | (defun devanagari-decompose-region (from to) | |
1200 | (interactive "r") | |
1201 | (save-restriction | |
1202 | (narrow-to-region from to) | |
1203 | (goto-char (point-min)) | |
1204 | (while (re-search-forward "." nil t) | |
1205 | (let* ((match-b (match-beginning 0)) (match-e (match-end 0)) | |
1206 | (decmps (devanagari-decompose-string (buffer-substring match-b match-e)))) | |
1207 | (delete-char -1) | |
1208 | (insert decmps))))) | |
1209 | ||
37cdc7ad KH |
1210 | ;;; |
1211 | ;;; Composition | |
1212 | ;;; | |
1213 | ||
1214 | ;;;###autoload | |
1215 | (defun devanagari-compose-string (str &rest langs) | |
1216 | (let ((len (length str)) | |
1217 | (src (devanagari-decompose-string str)) (dst "") rest match-b match-e) | |
1218 | (while (string-match devanagari-composite-glyph-unit src) | |
1219 | (setq match-b (match-beginning 0) match-e (match-end 0)) | |
1220 | (setq dst | |
1221 | (concat dst | |
1222 | (substring src 0 match-b) | |
1223 | (devanagari-compose-to-one-glyph | |
1224 | (apply | |
1225 | 'char-to-glyph-devanagari | |
1226 | (cons (substring src match-b match-e) | |
1227 | langs))))) | |
1228 | (setq src (substring src match-e))) | |
1229 | (setq dst (concat dst src)) | |
1230 | dst)) | |
4ed46869 | 1231 | |
37cdc7ad KH |
1232 | ;;;###autoload |
1233 | (defun devanagari-compose-region (from to &rest langs) | |
1234 | (interactive "r") | |
1235 | (save-restriction | |
1236 | (narrow-to-region from to) | |
1237 | (goto-char (point-min)) | |
1238 | (while (re-search-forward devanagari-composite-glyph-unit nil t) | |
1239 | (let* ((match-b (match-beginning 0)) (match-e (match-end 0)) | |
1240 | (cmps (devanagari-compose-to-one-glyph | |
1241 | (apply | |
1242 | 'char-to-glyph-devanagari | |
1243 | (cons (buffer-substring match-b match-e) | |
1244 | langs))))) | |
1245 | (delete-region match-b match-e) | |
1246 | (insert cmps))))) | |
4ed46869 KH |
1247 | |
1248 | ;; For pre-write and post-read conversion | |
1249 | ||
1250 | ;;;###autoload | |
1251 | (defun devanagari-compose-from-is13194-region (from to) | |
1252 | "Compose IS 13194 characters in the region to Devanagari characters." | |
1253 | (interactive "r") | |
b9c4dcd8 KH |
1254 | (save-excursion |
1255 | (save-restriction | |
1256 | (narrow-to-region from to) | |
1257 | (indian-to-devanagari-region (point-min) (point-max)) | |
1258 | (devanagari-compose-region (point-min) (point-max)) | |
1259 | (- (point-max) (point-min))))) | |
4ed46869 | 1260 | |
cefa701a KH |
1261 | ;;;###autoload |
1262 | (defun in-is13194-devanagari-post-read-conversion (len) | |
b9c4dcd8 KH |
1263 | (let ((pos (point)) |
1264 | (buffer-modified-p (buffer-modified-p))) | |
1265 | (prog1 | |
1266 | (devanagari-compose-from-is13194-region pos (+ pos len)) | |
1267 | (set-buffer-modified-p buffer-modified-p)))) | |
cefa701a | 1268 | |
4ed46869 KH |
1269 | ;;;###autoload |
1270 | (defun devanagari-decompose-to-is13194-region (from to) | |
1271 | "Decompose Devanagari characters in the region to IS 13194 characters." | |
1272 | (interactive "r") | |
1273 | (save-restriction | |
1274 | (narrow-to-region from to) | |
1275 | (devanagari-decompose-region (point-min) (point-max)) | |
1276 | (devanagari-to-indian-region (point-min) (point-max)))) | |
1277 | ||
cefa701a KH |
1278 | ;;;###autoload |
1279 | (defun in-is13194-devanagari-pre-write-conversion (from to) | |
1280 | (let ((old-buf (current-buffer)) | |
1281 | (work-buf (get-buffer-create " *devanagari-work*"))) | |
1282 | (set-buffer work-buf) | |
1283 | (erase-buffer) | |
1284 | (if (stringp from) | |
1285 | (insert from) | |
1286 | (insert-buffer-substring old-buf from to)) | |
1287 | (devanagari-decompose-to-is13194-region (point-min) (point-max)) | |
1288 | ;; Should return nil as annotations. | |
1289 | nil)) | |
37cdc7ad KH |
1290 | |
1291 | ;; For input/output of ITRANS | |
1292 | ||
1293 | ;;;###autoload | |
1294 | (defun devanagari-encode-itrans-region (from to) | |
1295 | (interactive "r") | |
1296 | (save-restriction | |
1297 | (narrow-to-region from to) | |
1298 | (devanagari-decompose-to-is13194-region (point-min) (point-max)) | |
1299 | (indian-encode-itrans-region (point-min) (point-max)))) | |
1300 | ||
1301 | ;;;###autoload | |
1302 | (defun devanagari-decode-itrans-region (from to) | |
1303 | (interactive "r") | |
1304 | (save-restriction | |
1305 | (narrow-to-region from to) | |
1306 | (indian-decode-itrans-region (point-min) (point-max)) | |
1307 | (devanagari-compose-from-is13194-region (point-min) (point-max)))) | |
1308 | ||
c47ec7c4 RS |
1309 | ;; Test comment. |
1310 | ||
4ed46869 | 1311 | ;; |
650e8505 | 1312 | (provide 'devan-util) |
4ed46869 | 1313 | |
37cdc7ad | 1314 | ;;; devan-util.el end here |