Commit | Line | Data |
---|---|---|
4ed46869 KH |
1 | ;;; devan-util.el --- Support for Devanagari Script Composition |
2 | ||
3 | ;; Copyright (C) 1996 Free Software Foundation, Inc. | |
4 | ||
5 | ;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp> | |
6 | ||
7 | ;; Keywords: multilingual, Indian, Devanagari | |
8 | ||
9 | ;; This file is part of GNU Emacs. | |
10 | ||
11 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
12 | ;; it under the terms of the GNU General Public License as published by | |
13 | ;; the Free Software Foundation; either version 2, or (at your option) | |
14 | ;; any later version. | |
15 | ||
16 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
17 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | ;; GNU General Public License for more details. | |
20 | ||
21 | ;; You should have received a copy of the GNU General Public License | |
e803d6bd KH |
22 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
23 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
24 | ;; Boston, MA 02111-1307, USA. | |
4ed46869 KH |
25 | |
26 | ;;; Commentary: | |
27 | ||
28 | ;; History: | |
29 | ;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp> | |
bd09f27b | 30 | ;; 1997.1.20 fixed some bugs. |
37cdc7ad KH |
31 | ;; 1997.3.24 fixed some bugs. |
32 | ||
33 | ;; Future work :: | |
34 | ;; Decompose the input characters and process them on the character basis. | |
4ed46869 KH |
35 | |
36 | ;; Devanagari script composition rules and related programs. | |
37 | ||
38 | ;;; Code: | |
39 | ||
40 | ;;; | |
41 | ;;; Steps toward composition of Devanagari Characters. | |
42 | ;;; | |
43 | ||
37cdc7ad KH |
44 | ;;; Intersection Function will be used. |
45 | (require 'cl) | |
46 | ||
335a7ad7 KH |
47 | ;;;###autoload |
48 | (defun setup-devanagari-environment () | |
49 | "Setup multilingual environment (MULE) for languages using Devanagari." | |
50 | (interactive) | |
8e2598fe KH |
51 | (setup-8-bit-environment "Devanagari" nil 'devanagari |
52 | "devanagari-itrans")) | |
335a7ad7 | 53 | |
4ed46869 KH |
54 | ;;; Basic functions. |
55 | ||
56 | ;;;###autoload | |
57 | (defun indian-to-devanagari (ch) | |
58 | "Convert IS 13194 characters to Devanagari basic characters." | |
59 | (let ((charcodes (split-char ch))) | |
60 | (if (eq (car charcodes) 'indian-is13194) | |
61 | (make-char 'indian-2-column ?\x21 (nth 1 charcodes)) | |
62 | ch))) | |
63 | ||
64 | ;;;###autoload | |
65 | (defun devanagari-to-indian (ch) | |
66 | "Convert Devanagari basic characters to IS 13194 characters." | |
67 | (let* ((charcodes (split-char ch)) | |
68 | (charset (car charcodes)) | |
69 | (code-h (car (cdr charcodes)))) | |
70 | (if (and (eq (car charcodes) 'indian-2-column) | |
71 | (= (nth 1 charcodes) ?\x21)) | |
72 | (make-char 'indian-is13194 (nth 2 charcodes)) | |
73 | ch))) | |
74 | ||
75 | ;;;###autoload | |
76 | (defun indian-to-devanagari-region (from to) | |
77 | "Convert IS 13194 characters in region to Devanagari basic characters." | |
78 | (interactive "r") | |
79 | (save-restriction | |
80 | (narrow-to-region from to) | |
81 | (goto-char (point-min)) | |
37cdc7ad KH |
82 | ; (while (re-search-forward "\\cd" nil t) |
83 | (while (re-search-forward "." nil t) | |
4ed46869 KH |
84 | (let* ((devanagari-char (indian-to-devanagari (preceding-char)))) |
85 | (delete-char -1) | |
86 | (insert devanagari-char))))) | |
87 | ||
88 | ;;;###autoload | |
89 | (defun devanagari-to-indian-region (from to) | |
90 | "Convert Devanagari basic characters in region to Indian characters." | |
91 | (interactive "r") | |
92 | (save-restriction | |
93 | (narrow-to-region from to) | |
94 | (goto-char (point-min)) | |
37cdc7ad KH |
95 | ; (while (re-search-forward "\\cD" nil t) ; Devanagari Character Code. |
96 | (while (re-search-forward "." nil t) | |
4ed46869 KH |
97 | (let* ((indian-char (devanagari-to-indian (preceding-char)))) |
98 | (delete-char -1) | |
99 | (insert indian-char))))) | |
100 | ||
101 | ;;;###autoload | |
102 | (defun indian-to-devanagari-string (str) | |
103 | "Convert Indian String to Devanagari Basic Character String." | |
104 | (let ((pos 0) (dst "") (src str) char) | |
105 | (while (not (equal src "")) | |
106 | (setq char (string-to-char src)) | |
107 | (setq src (substring src (char-bytes char))) | |
108 | (setq dst (concat dst (char-to-string (indian-to-devanagari char))))) | |
109 | dst)) | |
110 | ||
111 | ;; Phase 0 - Determine whether the characters can be composed. | |
112 | ;; | |
113 | ;;; | |
114 | ;;; Regular expressions to split characters for composition. | |
115 | ;;; | |
116 | ;; | |
117 | ;; Indian script word contains one or more syllables. | |
118 | ;; In BNF, it can be expressed as follows: | |
119 | ;; | |
120 | ;; Word ::= {Syllable} [Cons-Syllable] | |
121 | ;; Syllable ::= Cons-Vowel-Syllable | Vowel-Syllable | |
122 | ;; Vowel-Syllable ::= V[D] | |
123 | ;; Cons-Vowel-Syllable ::= [Cons-Syllable] Full-Cons [M] [D] | |
bd09f27b | 124 | ;; Cons-Syllable ::= [Pure-Cons] [Pure-Cons] [Pure-Cons] Pure-Cons |
4ed46869 KH |
125 | ;; Pure-Cons ::= Full-Cons H |
126 | ;; Full-Cons ::= C [N] | |
127 | ;; | |
128 | ;; {} repeat, [] optional | |
129 | ;; | |
130 | ;; C - Consonant (\e$(5!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A!B!C!D!E\e(B | |
131 | ;; \e$(5!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X\e(B) | |
132 | ;; N - Nukta (\e$(5!i\e(B) | |
bd09f27b KH |
133 | ;; H - Halant(\e$(5!h\e(B) or Virama |
134 | ;; V - Vowel (\e$(5!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2#&#'#*\e(B) | |
135 | ;; ("\e$(5#&#'#*\e(B" can be obtained by IS13194 vowels with nukta.) | |
37cdc7ad KH |
136 | ;; D - Vowel Modifiers, i.e. Anuswar, Chandrabindu (\e$(5!!!"\e(B) |
137 | ;; (Visaraga (\e$(5!#\e(B) is excluded.) | |
bd09f27b KH |
138 | ;; M - Matra (\e$(5!Z![!\!]!^!_!`!a!b!c!d!e!f!g#K#L#M\e(B) |
139 | ;; ("\e$(5#K#L#M\e(B" can be obtained by IS13194 matras with nukta.) | |
4ed46869 | 140 | ;; |
37cdc7ad KH |
141 | ;; In Emacs, one syllable of Indian language is considered to be one |
142 | ;; composite glyph. If we expand the above expression for | |
143 | ;; cons-vowel-syllable, it would be: | |
4ed46869 | 144 | ;; |
37cdc7ad | 145 | ;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] [D] |
4ed46869 | 146 | ;; |
37cdc7ad | 147 | ;; Therefore, in worst case, the one syllable may contain |
4ed46869 KH |
148 | ;; following characters. |
149 | ;; | |
bd09f27b KH |
150 | ;; C N H C N H C N H C N H C N M D |
151 | ;; | |
37cdc7ad KH |
152 | ;; The example is a sanskrit word "kArtsnya", where five consecutive |
153 | ;; consonants appear. | |
4ed46869 | 154 | ;; |
37cdc7ad KH |
155 | ;; On the other hand, consonant-syllable, which appears at the end of |
156 | ;; the word, would have the following expression: | |
4ed46869 | 157 | ;; |
bd09f27b | 158 | ;; [C [N] H] [C [N] H] [C [N] H] C [N] H |
4ed46869 KH |
159 | ;; |
160 | ;; This is acceptable BEFORE proper consonant-syllable is input. The | |
161 | ;; string which doesn't match with the above expression is invalid and | |
162 | ;; thus must be fixed. | |
163 | ;; | |
164 | ;; Note: | |
165 | ;; Third case can be considered, which is acceptable syllable and can | |
166 | ;; not add any code more. | |
167 | ;; | |
bd09f27b | 168 | ;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] D |
4ed46869 KH |
169 | ;; |
170 | ;; However, to make editing possible even in this condition, we will | |
171 | ;; not consider about this case. | |
37cdc7ad KH |
172 | ;; |
173 | ;; Note: | |
174 | ;; Currently, it seems that the only following consonants would have | |
175 | ;; Nukta sign attatched. | |
176 | ;; (\e$(5!3!4!5!:!?!@!I\e(B) | |
177 | ;; Therefore, [\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B? can be re-written as | |
178 | ;; \\([\e$(5!3!4!5!:!?!@!I\e(B]\e$(5!i\e(B\\)\\|[\e$(5!3\e(B-\e$(5!X\e(B] | |
179 | ||
180 | (defconst devanagari-full-cons | |
181 | "\\(\\([\e$(5!3!4!5!:!?!@!I\e(B]\e$(5!i\e(B\\)\\|[\e$(5!3\e(B-\e$(5!X$.$E"%\e(B]\\)" | |
182 | "Devanagari full consonant") | |
183 | ||
184 | (defconst devanagari-pure-cons | |
185 | (concat "\\(" devanagari-full-cons "\e$(5!h\e(B\\)") | |
186 | "Devanagari pure consonant") | |
187 | ||
188 | (defconst devanagari-matra | |
189 | "\\(\\([\e$(5!_![!\\e(B]\e$(5!i\e(B\\)\\|[\e$(5!Z\e(B-\e$(5!g#K#L#M\e(B]\\)" | |
190 | "Devanagari Matra Signs. '\e$(5#K#L#M\e(B' can also be created from the combination | |
191 | of '\e$(5!_![!\\e(B' and nukta sign.") | |
192 | ||
193 | (defconst devanagari-vowel | |
194 | "\\(\\([\e$(5!*!&!'\e(B]\e$(5!i\e(B\\)\\|[\e$(5!$\e(B-\e$(5!2#&#'#*\e(B]\\)" | |
195 | "Devanagari Vowels. '\e$(5#&#'#*\e(B' can also be created from the combination | |
196 | of '\e$(5!*!&!'\e(B' and nukta sign.") | |
197 | ||
198 | (defconst devanagari-vowel-syllable | |
199 | (concat devanagari-vowel "[\e$(5!!!"\e(B]?") | |
200 | "Devanagari vowel syllable.") | |
201 | ||
202 | (defconst devanagari-cons-syllable | |
203 | (concat devanagari-pure-cons "?" devanagari-pure-cons "?" | |
204 | devanagari-pure-cons "?" devanagari-pure-cons "$") | |
205 | "Devanagari consonant syllable") | |
206 | ||
207 | (defconst devanagari-cons-vowel-syllable | |
208 | (concat "\\(" | |
209 | devanagari-pure-cons "?" devanagari-pure-cons "?" | |
210 | devanagari-pure-cons "?" devanagari-pure-cons "\\)?" | |
211 | devanagari-full-cons devanagari-matra "?[\e$(5!!!"\e(B]?") | |
212 | "Devanagari consonant vowel syllable.") | |
4ed46869 KH |
213 | |
214 | ;; | |
215 | ;; Also, digits and virams should be processed other than syllables. | |
216 | ;; | |
217 | ;; In IS 13194, Avagrah is obtained by Nukta after Viram, and | |
218 | ;; OM is obtained by Nukta after Chandrabindu | |
219 | ;; | |
37cdc7ad KH |
220 | |
221 | (defconst devanagari-digit-viram-visarga | |
222 | "[\e$(5!q\e(B-\e$(5!z!j!#\e(B]") | |
223 | (defconst devanagari-other-sign | |
bd09f27b | 224 | "\\([\e$(5!!!j\e(B]\e$(5!i\e(B\\)\\|\\([\e$(5#!#J\e(B]\\)") |
4ed46869 | 225 | |
37cdc7ad KH |
226 | (defconst devanagari-composite-glyph-unit |
227 | (concat "\\(" devanagari-cons-syllable | |
228 | "\\)\\|\\(" devanagari-vowel-syllable | |
229 | "\\)\\|\\(" devanagari-digit-viram-visarga | |
230 | "\\)\\|\\(" devanagari-cons-vowel-syllable | |
231 | "\\)\\|\\(" devanagari-other-sign "\\)") | |
4ed46869 KH |
232 | "Regexp matching to Devanagari string to be composed form one glyph.") |
233 | ||
234 | ;;(put-charset-property charset-devanagari-1-column | |
235 | ;; 'char-to-glyph 'devanagari-compose-string) | |
236 | ;;(put-charset-property charset-devanagari-2-column | |
237 | ;; 'char-to-glyph 'devanagari-compose-string) | |
238 | ||
239 | ;; Sample | |
240 | ;; | |
37cdc7ad KH |
241 | ;;(string-match devanagari-cons-vowel-syllable-examine "\e$(5!X![\e(B") => 0 |
242 | ;;(string-match devanagari-cons-vowel-syllable-examine "\e$(5!F!h!D!\\e(B") => 0 | |
243 | ;;(string-match devanagari-cons-vowel-syllable-examine "\e$(5!X![!F!h!D!\\e(B") => 0 | |
4ed46869 KH |
244 | |
245 | ;; | |
246 | ;; Steps toward the composition | |
37cdc7ad | 247 | ;; Converting Character Codes to Composite Glyph. |
4ed46869 KH |
248 | ;; |
249 | ;; Example : \e$(5!X![\e(B/\e$(5!F!h!D!\\e(B | |
250 | ;; | |
251 | ;; First, convert Characters to appropriate glyphs. | |
252 | ;; | |
253 | ;; => \e$(5!X![\e(B/\e$(5"F!D!\\e(B | |
254 | ;; | |
255 | ;; Then, determine the base glyph, apply-orders and apply-rules. | |
256 | ;; | |
257 | ;; => \e$(5!X\e(B (ml.mr) \e$(5![\e(B / \e$(5!D\e(B (ml.mr) \e$(5"F\e(B (mr ml) \e$(5!\\e(B | |
258 | ;; | |
259 | ;; Finally, convert 2-column glyphs to 1-column glyph | |
260 | ;; if such a glyph exist. | |
261 | ;; | |
262 | ;; => \e$(6![\e(B (ml.mr) \e$(6!X\e(B / \e$(6!D\e(B (ml.mr) \e$(6"F\e(B (mr ml) \e$(6!\\e(B | |
263 | ;; | |
264 | ;; Compose the glyph. | |
265 | ;; | |
266 | ;; => \e2\e$(6!X@![\e1\e(B/\e2\e$(6!D@"FP!\\e1\e(B | |
267 | ;; => \e2\e$(6!X@![\e1\e2!D@"FP!\\e1\e(B | |
268 | ;; | |
269 | ||
270 | ;; | |
271 | ;; Phase 1: Converting Character Code to Glyph Code. | |
272 | ;; | |
273 | ;; | |
274 | ;; IMPORTANT: | |
275 | ;; There may be many rules which you many want to be suppressed. | |
276 | ;; In that case, please comment out that rule. | |
277 | ;; | |
278 | ;; RULES WILL BE EVALUATED FROM FIRST TO LAST. | |
279 | ;; PUT MORE SPECIFIC RULES FIRST. | |
280 | ;; | |
281 | ;; TO DO: | |
282 | ;; Prepare multiple specific list of rules for each languages | |
283 | ;; which adopts Devanagari script. | |
284 | ;; | |
285 | ||
286 | ||
287 | (defconst devanagari-char-to-glyph-rules | |
288 | '( | |
4ed46869 KH |
289 | |
290 | ;; `r' at the top of syllable and followed by other consonants. | |
37cdc7ad KH |
291 | ;; ("[^\e$(5!h\e(B]\\(\e$(5!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" "\e$(5"p\e(B") |
292 | ("^\\(\e$(5!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" "\e$(5"p\e(B") | |
4ed46869 KH |
293 | |
294 | ;; Ligature Rules | |
37cdc7ad KH |
295 | ("\\(\e$(5!3!h!B!h!O!h!M\e(B\\)" "\e$(5$!\e(B" sanskrit) |
296 | ("\\(\e$(5!3!h!B!h!T\e(B\\)" "\e$(5$"\e(B" sanskrit) | |
297 | ("\\(\e$(5!3!h!B!h!M\e(B\\)" "\e$(5$#\e(B" sanskrit) | |
298 | ("\\(\e$(5!3!h!F!h!M\e(B\\)" "\e$(5$$\e(B") | |
299 | ("\\(\e$(5!3!h!O!h!M\e(B\\)" "\e$(5$%\e(B") | |
300 | ("\\(\e$(5!3!h!O\e(B\\)" "\e$(5"#\e(B") ; Post "r" | |
301 | ("\\(\e$(5!3!h!T!h!M\e(B\\)" "\e$(5$&\e(B" sanskrit) | |
302 | ("\\(\e$(5!3!h\e(B\\)\e$(5!3!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"3\e(B") ; Special Half Form | |
303 | ("\\(\e$(5!3!h!3\e(B\\)" "\e$(5$'\e(B") | |
304 | ("\\(\e$(5!3!h\e(B\\)\e$(5!B!h!O\e(B" "\e$(5"3\e(B") ; Special Rules for "k-tr" | |
305 | ("\\(\e$(5!3!h!B\e(B\\)" "\e$(5$(\e(B") | |
306 | ("\\(\e$(5!3!h!F\e(B\\)" "\e$(5$)\e(B") | |
307 | ("\\(\e$(5!3!h!L\e(B\\)" "\e$(5$*\e(B") | |
308 | ("\\(\e$(5!3!h!M\e(B\\)" "\e$(5$+\e(B") | |
309 | ("\\(\e$(5!3!h!Q\e(B\\)" "\e$(5$,\e(B") | |
310 | ("\\(\e$(5!3!h!T\e(B\\)" "\e$(5$-\e(B") | |
311 | ("\\(\e$(5!3!h!V!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"l\e(B") ; Half Form | |
312 | ("\\(\e$(5$.!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"l\e(B") ; Half Form | |
313 | ("\\(\e$(5!3!h!V\e(B\\)" "\e$(5$.\e(B") | |
314 | ("\\(\e$(5!3!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"3\e(B") ; Half Form | |
315 | ("\\(\e$(5!3!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"s\e(B") ; Nukta Half Form | |
316 | ("\\(\e$(5!3!i\e(B\\)" "\e$(5#3\e(B") ; Nukta | |
317 | ("\\(\e$(5!4!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"4\e(B") ; Half Form | |
318 | ("\\(\e$(5!4!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"t\e(B") ; Nukta Half Form | |
319 | ("\\(\e$(5!4!i\e(B\\)" "\e$(5#4\e(B") ; Nukta | |
320 | ("\\(\e$(5!5!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"`\e(B") ; Half Form | |
321 | ("\\(\e$(5!5!h!O\e(B\\)" "\e$(5"$\e(B") ; Post "r" | |
322 | ("\\(\e$(5!5!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"5\e(B") ; Half Form | |
323 | ("\\(\e$(5!5!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"u\e(B") ; Nukta Half Form | |
324 | ("\\(\e$(5!5!i\e(B\\)" "\e$(5#5\e(B") ; Nukta | |
325 | ("\\(\e$(5!6!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"a\e(B") ; Half Form | |
326 | ("\\(\e$(5!6!h!F\e(B\\)" "\e$(5$/\e(B") | |
327 | ; Slot | |
328 | ("\\(\e$(5!6!h!O\e(B\\)" "\e$(5!6"q\e(B") ; Post "r" | |
329 | ("\\(\e$(5!6!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"6\e(B") ; Half Form | |
330 | ("\\(\e$(5!7!h!3!h!B!h!M\e(B\\)" "\e$(5$0\e(B" sanskrit) | |
331 | ("\\(\e$(5!7!h!3!h!V!h!T\e(B\\)" "\e$(5$1\e(B" sanskrit) | |
332 | ("\\(\e$(5!7!h!3!h!B\e(B\\)" "\e$(5$2\e(B" sanskrit) | |
333 | ("\\(\e$(5!7!h!3!h!V\e(B\\)" "\e$(5$3\e(B" sanskrit) | |
334 | ("\\(\e$(5!7!h!3!h!O\e(B\\)" "\e$(5$9"q\e(B") ; Special Rule. May be precomposed font needed. | |
335 | ("\\(\e$(5!7!h!6!h!O\e(B\\)" "\e$(5$4\e(B" sanskrit) | |
336 | ("\\(\e$(5!7!h!3!h!M\e(B\\)" "\e$(5$5\e(B" sanskrit) | |
337 | ("\\(\e$(5!7!h!4!h!M\e(B\\)" "\e$(5$6\e(B" sanskrit) | |
338 | ("\\(\e$(5!7!h!5!h!M\e(B\\)" "\e$(5$7\e(B" sanskrit) | |
339 | ("\\(\e$(5!7!h!6!h!M\e(B\\)" "\e$(5$8\e(B" sanskrit) | |
340 | ("\\(\e$(5!7!h!3\e(B\\)" "\e$(5$9\e(B") | |
341 | ("\\(\e$(5!7!h!4\e(B\\)" "\e$(5$:\e(B") | |
342 | ("\\(\e$(5!7!h!5!h!O\e(B\\)" "\e$(5$;"q\e(B") ; Special Rule. May be precomposed font needed. | |
343 | ("\\(\e$(5!7!h!5\e(B\\)" "\e$(5$;\e(B") | |
344 | ("\\(\e$(5!7!h!6\e(B\\)" "\e$(5$<\e(B") | |
345 | ("\\(\e$(5!7!h!7\e(B\\)" "\e$(5$=\e(B") | |
346 | ("\\(\e$(5!7!h!F\e(B\\)" "\e$(5$>\e(B") | |
347 | ("\\(\e$(5!7!h!L\e(B\\)" "\e$(5$?\e(B") | |
348 | ("\\(\e$(5!7!h!M\e(B\\)" "\e$(5$@\e(B") | |
349 | ("\\(\e$(5!8!h\e(B\\)[\e$(5!8!<\e(B]\e$(5!h\e(B" "\e$(5"8\e(B") ; Half Form | |
350 | ("\\(\e$(5!8!h!8\e(B\\)" "\e$(5$A\e(B") | |
351 | ("\\(\e$(5!8!h!<\e(B\\)" "\e$(5$B\e(B") | |
352 | ("\\(\e$(5!8!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"8"q\e(B") ; Half Form Post "r" | |
353 | ("\\(\e$(5!8!h!O\e(B\\)" "\e$(5!8"q\e(B") ; Post "r" | |
354 | ("\\(\e$(5!8!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"8\e(B") ; Half Form | |
355 | ("\\(\e$(5!9!h!M\e(B\\)" "\e$(5$C\e(B") | |
356 | ("\\(\e$(5!:!h!O\e(B\\)" "\e$(5$D\e(B") | |
357 | ("\\(\e$(5!:!h!<!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"m\e(B") ; Half Form | |
358 | ("\\(\e$(5!:!h!<\e(B\\)" "\e$(5$E\e(B") | |
359 | ("\\(\e$(5!:!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5":\e(B") ; Half Form | |
360 | ("\\(\e$(5!:!i!h!O\e(B\\)" "\e$(5"!\e(B") ; Nukta Post "r" | |
361 | ("\\(\e$(5!:!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"z\e(B") ; Nukta Half Form | |
362 | ("\\(\e$(5!:!i\e(B\\)" "\e$(5#:\e(B") ; Nukta | |
363 | ("\\(\e$(5!;!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5";\e(B") ; Half Form | |
364 | ("\\(\e$(5!<!h\e(B\\)\e$(5!8!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"<\e(B") ; Special Half Form | |
365 | ("\\(\e$(5!<!h!8\e(B\\)" "\e$(5$F\e(B") | |
366 | ("\\(\e$(5!<!h\e(B\\)\e$(5!:!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"<\e(B") ; Special Half Form | |
367 | ("\\(\e$(5!<!h!:\e(B\\)" "\e$(5$G\e(B") | |
368 | ("\\(\e$(5!<!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"<\e(B") ; Half Form | |
369 | ("\\(\e$(5!=!h!3\e(B\\)" "\e$(5$H\e(B") | |
370 | ("\\(\e$(5!=!h!=\e(B\\)" "\e$(5$I\e(B") | |
371 | ("\\(\e$(5!=!h!>\e(B\\)" "\e$(5$J\e(B") | |
372 | ("\\(\e$(5!=!h!M\e(B\\)" "\e$(5$K\e(B") | |
373 | ("\\(\e$(5!>!h!M\e(B\\)" "\e$(5$L\e(B") | |
374 | ("\\(\e$(5!?!h!5!h!M\e(B\\)" "\e$(5$M\e(B" sanskrit) | |
375 | ("\\(\e$(5!?!h!6!h!O\e(B\\)" "\e$(5$N\e(B" sanskrit) | |
376 | ("\\(\e$(5!?!h!O!h!M\e(B\\)" "\e$(5$O\e(B") | |
377 | ("\\(\e$(5!?!h!5\e(B\\)" "\e$(5$P\e(B") | |
378 | ("\\(\e$(5!?!h!6\e(B\\)" "\e$(5$Q\e(B") | |
379 | ("\\(\e$(5!?!h!?\e(B\\)" "\e$(5$R\e(B") | |
380 | ("\\(\e$(5!?!h!L\e(B\\)" "\e$(5$S\e(B") | |
381 | ("\\(\e$(5!?!h!M\e(B\\)" "\e$(5$T\e(B") | |
382 | ("\\(\e$(5!?!i\e(B\\)" "\e$(5#?\e(B") ; Nukta | |
383 | ("\\(\e$(5!@!h!M\e(B\\)" "\e$(5$`\e(B") | |
384 | ("\\(\e$(5!@!i\e(B\\)" "\e$(5#@\e(B") ; Nukta | |
385 | ("\\(\e$(5!A!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"A\e(B") ; Half Form | |
386 | ("\\(\e$(5!B!h\e(B\\)\e$(5!B!h!O\e(B" "\e$(5"B\e(B") ; Special Rule for "t-tr" | |
387 | ("\\(\e$(5!B!h!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"c\e(B") ; Half Form | |
388 | ("\\(\e$(5!B!h!B\e(B\\)" "\e$(5$a\e(B") | |
389 | ("\\(\e$(5!B!h!F\e(B\\)" "\e$(5$b\e(B") | |
390 | ("\\(\e$(5!B!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"d\e(B") ; Half Form Post "r" | |
391 | ("\\(\e$(5!B!h!O\e(B\\)" "\e$(5"%\e(B") ; Post "r" | |
392 | ("\\(\e$(5!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"B\e(B") ; Half Form | |
393 | ("\\(\e$(5!C!h!O\e(B\\)" "\e$(5!C"q\e(B") ; Post "r" | |
394 | ("\\(\e$(5!C!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"C\e(B") ; Half Form | |
395 | ("\\(\e$(5!D!h!D!h!M\e(B\\)" "\e$(5$c\e(B") | |
396 | ("\\(\e$(5!D!h!E!h!M\e(B\\)" "\e$(5$d\e(B") | |
397 | ("\\(\e$(5!D!h!K!h!M\e(B\\)" "\e$(5$e\e(B") | |
398 | ("\\(\e$(5!D!h!K!h!O\e(B\\)" "\e$(5$r"r\e(B") ; Special Case for "dbhr" ; *** | |
399 | ("\\(\e$(5!D!h!O!h!M\e(B\\)" "\e$(5$f\e(B") | |
400 | ("\\(\e$(5!D!h!T!h!M\e(B\\)" "\e$(5$g\e(B") | |
401 | ("\\(\e$(5!D!h!5!h!O\e(B\\)" "\e$(5$h\e(B") | |
402 | ("\\(\e$(5!D!h!6!h!O\e(B\\)" "\e$(5$i\e(B") | |
403 | ("\\(\e$(5!D!h!D!h!T\e(B\\)" "\e$(5$j\e(B") | |
404 | ("\\(\e$(5!D!h!E!h!T\e(B\\)" "\e$(5$k\e(B") | |
405 | ("\\(\e$(5!D!h\e(B\\)\e$(5!E!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5!D!h\e(B") ; Special Half Form (for ddhra) | |
406 | ("\\(\e$(5!D!h!5\e(B\\)" "\e$(5$l\e(B") | |
407 | ("\\(\e$(5!D!h!6\e(B\\)" "\e$(5$m\e(B") | |
408 | ("\\(\e$(5!D!h!D\e(B\\)" "\e$(5$n\e(B") | |
409 | ("\\(\e$(5!D!h!E\e(B\\)" "\e$(5$o\e(B") | |
410 | ("\\(\e$(5!D!h!F\e(B\\)" "\e$(5$p\e(B") | |
411 | ("\\(\e$(5!D!h\e(B\\)\e$(5!J!h\e(B" "\e$(5!D!h\e(B") ; Suppressing "db-" | |
412 | ("\\(\e$(5!D!h!J\e(B\\)" "\e$(5$q\e(B") | |
413 | ("\\(\e$(5!D!h!K\e(B\\)" "\e$(5$r\e(B") | |
414 | ("\\(\e$(5!D!h!L\e(B\\)" "\e$(5$s\e(B") | |
415 | ("\\(\e$(5!D!h!M\e(B\\)" "\e$(5$t\e(B") | |
416 | ("\\(\e$(5!D!h!T\e(B\\)" "\e$(5$u\e(B") | |
417 | ("\\(\e$(5!E!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"e\e(B") ; Half Form | |
418 | ("\\(\e$(5!E!h!F\e(B\\)" "\e$(5$v\e(B") | |
419 | ("\\(\e$(5!E!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"f\e(B") ; Half Form Post "r" | |
420 | ("\\(\e$(5!E!h!O\e(B\\)" "\e$(5!E"q\e(B") ; Post "r" | |
421 | ("\\(\e$(5!E!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"E\e(B") ; Half Form | |
422 | ("\\(\e$(5!F!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"k\e(B") ; Half Form | |
423 | ("\\(\e$(5!F!h!F\e(B\\)" "\e$(5$w\e(B") | |
424 | ("\\(\e$(5!F!h!O\e(B\\)" "\e$(5!F"q\e(B") | |
425 | ("\\(\e$(5!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"F\e(B") ; Half Form | |
426 | ("\\(\e$(5!G!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"G\e(B") ; Nukta Half Form | |
427 | ("\\(\e$(5!H!h\e(B\\)\e$(5!B!h!O\e(B" "\e$(5"H\e(B") ; Special Rule for "p-tr" | |
428 | ("\\(\e$(5!H!h!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"g\e(B") ; Half Form | |
429 | ("\\(\e$(5!H!h!B\e(B\\)" "\e$(5$x\e(B") | |
430 | ("\\(\e$(5!H!h!F\e(B\\)" "\e$(5$y\e(B") | |
431 | ("\\(\e$(5!H!h!Q\e(B\\)" "\e$(5$z\e(B") | |
432 | ("\\(\e$(5!H!h!O\e(B\\)" "\e$(5"&\e(B") ; Post "r" | |
433 | ("\\(\e$(5!H!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"H\e(B") ; Half Form | |
434 | ("\\(\e$(5!I!h!O\e(B\\)" "\e$(5"'\e(B") ; Post "r" | |
435 | ("\\(\e$(5!I!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"I\e(B") ; Half Form | |
436 | ("\\(\e$(5!I!i!h!O\e(B\\)" "\e$(5""\e(B") ; Nukta Post "r" | |
437 | ("\\(\e$(5!I!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"y\e(B") ; Nukta Half Form | |
438 | ("\\(\e$(5!I!i\e(B\\)" "\e$(5#I\e(B") ; Nukta | |
439 | ("\\(\e$(5!J!h\e(B\\)\e$(5!F!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"J\e(B") ; Special Half Form | |
440 | ("\\(\e$(5!J!h!F\e(B\\)" "\e$(5${\e(B") | |
441 | ("\\(\e$(5!J!h\e(B\\)\e$(5!J!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"J\e(B") ; Special Half Form | |
442 | ("\\(\e$(5!J!h!J\e(B\\)" "\e$(5$|\e(B") | |
443 | ("\\(\e$(5!J!h\e(B\\)\e$(5!T!h\e(B[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"J\e(B") ; Special Half Form | |
444 | ("\\(\e$(5!J!h!T\e(B\\)" "\e$(5$}\e(B") | |
445 | ("\\(\e$(5!J!h!O\e(B\\)" "\e$(5!J"q\e(B") ; Post "r" | |
446 | ("\\(\e$(5!J!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"J\e(B") ; Half Form | |
447 | ("\\(\e$(5!K!h!F\e(B\\)" "\e$(5$~\e(B") | |
448 | ("\\(\e$(5!K!h!O\e(B\\)" "\e$(5!K"q\e(B") ; Post "r" | |
449 | ("\\(\e$(5!K!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"K\e(B") ; Half Form | |
450 | ("\\(\e$(5!L!h!F\e(B\\)" "\e$(5#P\e(B") | |
451 | ("\\(\e$(5!L!h!Q\e(B\\)" "\e$(5#Q\e(B") | |
452 | ("\\(\e$(5!L!h!O\e(B\\)" "\e$(5!L"q\e(B") ; Post "r" | |
453 | ("\\(\e$(5!L!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"L\e(B") ; Half Form | |
454 | ("\\(\e$(5!M!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"M\e(B") ; Half Form | |
455 | ("\\(\e$(5!N!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"N\e(B") ; Half Form | |
456 | ;; special form for "ru". | |
457 | ("\\(\e$(5!O!]\e(B\\)" "\e$(5",\e(B") | |
458 | ("\\(\e$(5!O!^\e(B\\)" "\e$(5"-\e(B") | |
459 | ("\\(\e$(5!P!]\e(B\\)" "\e$(5".\e(B") | |
460 | ("\\(\e$(5!P!^\e(B\\)" "\e$(5"/\e(B") | |
4ed46869 | 461 | ;; |
37cdc7ad KH |
462 | ("\\(\e$(5!Q!h!Q\e(B\\)" "\e$(5#`\e(B" sanskrit) |
463 | ("\\(\e$(5!Q!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"Q\e(B") ; Half Form | |
464 | ("\\(\e$(5!R!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"R\e(B") ; Half Form | |
465 | ("\\(\e$(5!S!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"S\e(B") ; Half Form | |
466 | ("\\(\e$(5!T!h!F\e(B\\)" "\e$(5#a\e(B") | |
467 | ("\\(\e$(5!T!h!T\e(B\\)" "\e$(5#b\e(B") | |
468 | ("\\(\e$(5!T!h!O\e(B\\)" "\e$(5!T"q\e(B") ; Post "r" | |
469 | ("\\(\e$(5!T!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"T\e(B") ; Half Form | |
470 | ("\\(\e$(5!U!h!8!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"h\e(B") ; Half Form | |
471 | ("\\(\e$(5!U!h!8\e(B\\)" "\e$(5#c\e(B") | |
472 | ("\\(\e$(5!U!h!F\e(B\\)" "\e$(5#d\e(B") | |
473 | ("\\(\e$(5!U!h!J\e(B\\)" "\e$(5#e\e(B") | |
474 | ("\\(\e$(5!U!h!Q\e(B\\)" "\e$(5#f\e(B") | |
475 | ("\\(\e$(5!U!h\e(B\\)\e$(5!T!h!O\e(B" "\e$(5"U\e(B") ; Special Half Form | |
476 | ("\\(\e$(5!U!h!T!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"j\e(B") ; Half Form | |
477 | ; ("\\(\e$(5!U!h!T\e(B\\)" "\e$(5#g\e(B") | |
478 | ("\\(\e$(5!U!h!O!h!T\e(B\\)" "\e$(5#g\e(B") | |
479 | ("\\(\e$(5!U!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"i\e(B") ; Half Form | |
480 | ("\\(\e$(5!U!h!O\e(B\\)" "\e$(5")\e(B") ; Post "r" | |
481 | ("\\(\e$(5!U!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"U\e(B") ; Half Form | |
482 | ("\\(\e$(5!V!h!=!h!O!h!M\e(B\\)" "\e$(5#h\e(B") | |
483 | ("\\(\e$(5!V!h!=!h!M\e(B\\)" "\e$(5#i\e(B") | |
484 | ("\\(\e$(5!V!h!=!h!T\e(B\\)" "\e$(5#j\e(B") | |
485 | ("\\(\e$(5!V!h!=\e(B\\)" "\e$(5#k\e(B") | |
486 | ("\\(\e$(5!V!h!>\e(B\\)" "\e$(5#l\e(B") | |
487 | ("\\(\e$(5!V!h!O\e(B\\)" "\e$(5!V"q\e(B") ; Post "r" | |
488 | ("\\(\e$(5!V!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"V\e(B") ; Half Form | |
489 | ("\\(\e$(5!W!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"W"F\e(B") ; Special Half Form | |
490 | ("\\(\e$(5!W!h!F\e(B\\)" "\e$(5#m\e(B") | |
491 | ("\\(\e$(5!W!h!O\e(B\\)" "\e$(5#n\e(B") | |
492 | ("\\(\e$(5!W!h\e(B\\)[\e$(5!3\e(B-\e$(5!N!P\e(B-\e$(5!X\e(B]" "\e$(5"W\e(B") ; Half Form | |
493 | ("\\(\e$(5!X!h!A\e(B\\)" "\e$(5#p\e(B") | |
494 | ("\\(\e$(5!X!h!F\e(B\\)" "\e$(5#q\e(B") | |
495 | ("\\(\e$(5!X!h!L\e(B\\)" "\e$(5#r\e(B") | |
496 | ("\\(\e$(5!X!h!M\e(B\\)" "\e$(5#s\e(B") | |
497 | ("\\(\e$(5!X!h!O\e(B\\)" "\e$(5#t\e(B") | |
498 | ("\\(\e$(5!X!h!Q\e(B\\)" "\e$(5#u\e(B") | |
499 | ("\\(\e$(5!X!h!T\e(B\\)" "\e$(5#v\e(B") | |
500 | ;; Special Ligature Rules | |
501 | ("\\(\e$(5!X!_\e(B\\)" "\e$(5#R\e(B") | |
4ed46869 KH |
502 | |
503 | ;; For consonants other than listed above, glyph-composition will | |
504 | ;; be applied. If the consonant which is preceding "\e$(5!O\e(B" does not | |
505 | ;; have the vertical line (such as "\e$(5!?\e(B"), "\e$(5"r\e(B" is put beneath the | |
506 | ;; consonant. | |
507 | ;; | |
37cdc7ad KH |
508 | ("[\e$(5!7!9!=!>!?!@!D!O!P!R!S!X\e(B]\\(\e$(5!h!O\e(B\\)" "\e$(5"r\e(B") |
509 | ("[\e$(5!6!8!C!E!F!H!J!K!L!M!T!V\e(B]\\(\e$(5!h!O\e(B\\)" "\e$(5"q\e(B") | |
510 | ("\e$(5!?!i\e(B\\(\e$(5!h!O\e(B\\)" "\e$(5"r\e(B") | |
511 | ("\e$(5!@!i\e(B\\(\e$(5!h!O\e(B\\)" "\e$(5"r\e(B") | |
512 | ||
513 | ;; Nukta with Non-Consonants | |
514 | ("\\(\e$(5!!!i\e(B\\)" "\e$(5#!\e(B") | |
515 | ("\\(\e$(5!&!i\e(B\\)" "\e$(5#&\e(B") | |
516 | ("\\(\e$(5!'!i\e(B\\)" "\e$(5#'\e(B") | |
517 | ("\\(\e$(5!*!i\e(B\\)" "\e$(5#*\e(B") | |
518 | ("\\(\e$(5![!i\e(B\\)" "\e$(5#L\e(B") | |
519 | ("\\(\e$(5!\!i\e(B\\)" "\e$(5#M\e(B") | |
520 | ("\\(\e$(5!_!i\e(B\\)" "\e$(5#K\e(B") | |
521 | ("\\(\e$(5!j!i\e(B\\)" "\e$(5#J\e(B") | |
522 | ||
523 | ;; Special rule for "r + some vowels" | |
524 | ("\\(\e$(5!O!_\e(B\\)" "\e$(5!*"p\e(B") | |
525 | ("\\(\e$(5!O#L\e(B\\)" "\e$(5#&"p\e(B") | |
526 | ("\\(\e$(5!O#K\e(B\\)" "\e$(5#*"p\e(B") | |
527 | ("\\(\e$(5!O#M\e(B\\)" "\e$(5#'"p\e(B") | |
bd09f27b | 528 | ;; If everything fails, "y" will connect to the front consonant. |
37cdc7ad | 529 | ("\\(\e$(5!h!M\e(B\\)" "\e$(5"]\e(B") |
4ed46869 KH |
530 | ) |
531 | "Alist of regexps of Devanagari character sequences vs composed characters.") | |
532 | ||
4ed46869 KH |
533 | (let ((rules devanagari-char-to-glyph-rules)) |
534 | (while rules | |
535 | (let ((rule (car rules)) | |
37cdc7ad | 536 | (chars) (char) (glyphs) (glyph)) |
4ed46869 KH |
537 | (setq rules (cdr rules)) |
538 | (string-match "\\\\(\\(.+\\)\\\\)" (car rule)) | |
539 | (setq chars (substring (car rule) (match-beginning 1) (match-end 1))) | |
540 | (setq char (string-to-char chars)) | |
37cdc7ad KH |
541 | (setq glyphs (cdr rule)) |
542 | (setq glyph (string-to-char (car glyphs))) | |
4ed46869 | 543 | (put-char-code-property |
37cdc7ad KH |
544 | char 'char-to-glyph |
545 | ;; We don't "cons" it since priority is top to down. | |
546 | (append (get-char-code-property char 'char-to-glyph) (list rule))) | |
547 | ||
548 | (if (and (< ?\e(5z\e(B glyph) ; Glyphs only. | |
549 | (null (get-char-code-property glyph 'glyph-to-char))) | |
550 | ; One glyph may corresponds to multiple characters, | |
551 | ; e.g., surrounding vowel in Tamil, etc. | |
552 | ; but for Devanagari, we put this restriction | |
553 | ; to make sure the fact that one glyph corresponds to one char. | |
554 | (put-char-code-property | |
555 | glyph 'glyph-to-char | |
556 | (cons (list (car glyphs) chars) | |
557 | (get-char-code-property glyph 'glyph-to-char) | |
558 | )))))) | |
4ed46869 KH |
559 | |
560 | ;; | |
37cdc7ad KH |
561 | ;; Function used in both characters-to-glyphs conversion and |
562 | ;; glyphs-to-characters conversion. | |
4ed46869 KH |
563 | ;; |
564 | ||
37cdc7ad KH |
565 | (defun max-match-len (regexp-str) |
566 | "This returns the possible length of matched string of given regexp. | |
567 | Only [...] pattern of regexp is recognized. The last character of | |
568 | inside of [....] is used for its length." | |
569 | (let ((dest-str regexp-str)) | |
570 | (while (string-match "\\[\\([^\]]\\)+\\]" dest-str) | |
571 | (setq dest-str | |
572 | (concat (substring dest-str 0 (match-beginning 0)) | |
573 | (substring dest-str (match-beginning 1) (match-end 1)) | |
574 | (substring dest-str (match-end 0))))) | |
575 | (length dest-str))) | |
576 | ||
577 | (defun string-conversion-by-rule (src-str symbol &rest specs) | |
578 | " This function converts the SRC-STR to the new string according to | |
579 | the rules described in the each character's SYMBOL property. The | |
580 | rules are described in the forms of '((regexp str <specs>) ...), and | |
581 | the character sequence in the string which matches to 'regexp' are | |
582 | replaced with str. If SPECS are not specified, only rules with no | |
583 | <specs> would be applied. If SPECS are specified, then rules with no | |
584 | <specs> specified and rules with <spec> matches with SPECS would be | |
585 | applied. Rules are tested in the order of the list, thus more | |
586 | specific rules should be placed in front of less important rules. No | |
587 | composite character is supported, thus such must be converted by | |
588 | decompose-char before applying to this function. If rule is given in | |
589 | the forms of regexp '...\\(...\\)...', then inside the parenthesis is | |
590 | the subject of the match. Otherwise, the entire expression is the | |
591 | subject of the match." | |
4ed46869 KH |
592 | (let ((pos 0) |
593 | (dst-str "")) | |
594 | (while (< pos (length src-str)) | |
595 | (let ((found nil) | |
596 | (rules (get-char-code-property | |
597 | (string-to-char | |
37cdc7ad | 598 | (substring src-str pos)) symbol))) |
4ed46869 KH |
599 | (while rules |
600 | (let* ((rule (car rules)) | |
37cdc7ad KH |
601 | (regexp (car rule)) |
602 | (replace-str (car (cdr rule))) | |
603 | (rule-specs (cdr (cdr rule))) | |
604 | search-pos) | |
605 | (if (not (or (null rule-specs) | |
606 | (intersection specs rule-specs))) | |
607 | (setq rules (cdr rules)) | |
608 | (if (null (string-match "\\\\(.+\\\\)" regexp)) | |
609 | (progn | |
610 | (setq regexp (concat "\\(" regexp "\\)")) | |
611 | (setq search-pos pos)) | |
612 | (setq search-pos (- pos (max-match-len | |
613 | (substring regexp | |
614 | (string-match "^[^\\\\]*" regexp) | |
615 | (match-end 0)))))) | |
616 | (if (< search-pos 0) (setq search-pos 0)) | |
617 | (if (string-match regexp src-str search-pos) | |
618 | (if (= (match-beginning 1) pos) | |
619 | (progn | |
620 | (setq dst-str (concat dst-str replace-str)) | |
621 | (setq rules nil) ; Get out of the loop. | |
622 | (setq found t) | |
623 | ;; proceed `pos' for replaced characters. | |
624 | (setq pos (match-end 1))) | |
625 | (setq rules (cdr rules))) | |
626 | (setq rules (cdr rules)))))) | |
4ed46869 KH |
627 | ;; proceed to next position |
628 | (if (not found) | |
629 | (let ((nextchar (string-to-char (substring src-str pos)))) | |
630 | (setq pos (+ pos | |
631 | (char-bytes (string-to-char (substring src-str pos))))) | |
632 | (setq dst-str (concat dst-str (char-to-string nextchar))))))) | |
633 | dst-str)) | |
634 | ||
37cdc7ad KH |
635 | |
636 | ;; | |
637 | ;; Convert Character Code to Glyph Code | |
638 | ;; | |
639 | ||
640 | ;;;###autoload | |
641 | (defun char-to-glyph-devanagari (src-str &rest langs) | |
642 | "Convert Devanagari characters in the string to Devanagari glyphs. | |
643 | Ligatures and special rules are processed." | |
644 | (apply | |
645 | 'string-conversion-by-rule | |
646 | (append (list src-str 'char-to-glyph) langs))) | |
647 | ||
4ed46869 KH |
648 | ;; Example: |
649 | ;;(char-to-glyph-devanagari "\e$(5!X![!F!h!D!\\e(B") => "\e$(5!X!["F!D!\\e(B" | |
650 | ;;(char-to-glyph-devanagari "\e$(5!O!Z!V!h!=!h!O![!M\e(B") => ??? | |
651 | ||
652 | ;; | |
653 | ;; Phase 2: Compose Glyphs to form One Glyph. | |
654 | ;; | |
655 | ||
656 | ;; Each list consist of glyph, application-priority and application-direction. | |
657 | ;; | |
658 | ;; Glyphs will be ordered from low priority number to high priority number. | |
659 | ;; If application-priority is omitted, it is assumed to be 0. | |
660 | ;; If application-direction is omitted, it is asumbed to be '(mr . ml). | |
661 | ||
662 | (defconst devanagari-composition-rules | |
37cdc7ad KH |
663 | '((?\e$(5!!\e(B 0 (tr . br)) |
664 | (?\e$(5!"\e(B 0 (mr . mr)) | |
665 | (?\e$(5!#\e(B 0) | |
4ed46869 KH |
666 | (?\e$(5!$\e(B 0) |
667 | (?\e$(5!%\e(B 0) | |
668 | (?\e$(5!&\e(B 0) | |
669 | (?\e$(5!'\e(B 0) | |
670 | (?\e$(5!(\e(B 0) | |
671 | (?\e$(5!)\e(B 0) | |
672 | (?\e$(5!*\e(B 0) | |
673 | (?\e$(5!+\e(B 0) | |
674 | (?\e$(5!,\e(B 0) | |
675 | (?\e$(5!-\e(B 0) | |
676 | (?\e$(5!.\e(B 0) | |
677 | (?\e$(5!/\e(B 0) | |
678 | (?\e$(5!0\e(B 0) | |
679 | (?\e$(5!1\e(B 0) | |
680 | (?\e$(5!2\e(B 0) | |
681 | (?\e$(5!3\e(B 0) | |
682 | (?\e$(5!4\e(B 0) | |
683 | (?\e$(5!5\e(B 0) | |
684 | (?\e$(5!6\e(B 0) | |
685 | (?\e$(5!7\e(B 0) | |
686 | (?\e$(5!8\e(B 0) | |
687 | (?\e$(5!9\e(B 0) | |
688 | (?\e$(5!:\e(B 0) | |
689 | (?\e$(5!;\e(B 0) | |
690 | (?\e$(5!<\e(B 0) | |
691 | (?\e$(5!=\e(B 0) | |
692 | (?\e$(5!>\e(B 0) | |
693 | (?\e$(5!?\e(B 0) | |
694 | (?\e$(5!@\e(B 0) | |
695 | (?\e$(5!A\e(B 0) | |
696 | (?\e$(5!B\e(B 0) | |
697 | (?\e$(5!C\e(B 0) | |
698 | (?\e$(5!D\e(B 0) | |
699 | (?\e$(5!E\e(B 0) | |
700 | (?\e$(5!F\e(B 0) | |
701 | (?\e$(5!G\e(B 0) | |
702 | (?\e$(5!H\e(B 0) | |
703 | (?\e$(5!I\e(B 0) | |
704 | (?\e$(5!J\e(B 0) | |
705 | (?\e$(5!K\e(B 0) | |
706 | (?\e$(5!L\e(B 0) | |
707 | (?\e$(5!M\e(B 0) | |
708 | (?\e$(5!N\e(B 0) | |
709 | (?\e$(5!O\e(B 0) | |
710 | (?\e$(5!P\e(B 0) | |
711 | (?\e$(5!Q\e(B 0) | |
712 | (?\e$(5!R\e(B 0) | |
713 | (?\e$(5!S\e(B 0) | |
714 | (?\e$(5!T\e(B 0) | |
715 | (?\e$(5!U\e(B 0) | |
716 | (?\e$(5!V\e(B 0) | |
717 | (?\e$(5!W\e(B 0) | |
718 | (?\e$(5!X\e(B 0) | |
719 | (?\e$(5!Y\e(B 0) | |
37cdc7ad KH |
720 | (?\e$(5!Z\e(B 0) |
721 | (?\e$(5![\e(B 0 (ml . mr)) | |
722 | (?\e$(5!\\e(B 0) | |
723 | (?\e$(5!]\e(B 0 (br . tr)) | |
724 | (?\e$(5!^\e(B 0 (br . tr)) | |
725 | (?\e$(5!_\e(B 0 (br . tr)) | |
726 | (?\e$(5!`\e(B 0 (mr . mr)) ; (tc . bc) | |
727 | (?\e$(5!a\e(B 0 (mr . mr)) | |
728 | (?\e$(5!b\e(B 0 (mr . mr)) | |
729 | (?\e$(5!c\e(B 0 (mr . mr)) | |
730 | (?\e$(5!d\e(B 0) | |
731 | (?\e$(5!e\e(B 0) | |
732 | (?\e$(5!f\e(B 0) | |
733 | (?\e$(5!g\e(B 0) | |
bd09f27b KH |
734 | (?\e$(5!h\e(B 0 (br . tr)) |
735 | (?\e$(5!i\e(B 0 (br . tr)) | |
4ed46869 KH |
736 | (?\e$(5!j\e(B 0) |
737 | (nil 0) | |
738 | (nil 0) | |
739 | (nil 0) | |
740 | (nil 0) | |
741 | (nil 0) | |
742 | (nil 0) | |
743 | (?\e$(5!q\e(B 0) | |
744 | (?\e$(5!r\e(B 0) | |
745 | (?\e$(5!s\e(B 0) | |
746 | (?\e$(5!t\e(B 0) | |
747 | (?\e$(5!u\e(B 0) | |
748 | (?\e$(5!v\e(B 0) | |
749 | (?\e$(5!w\e(B 0) | |
750 | (?\e$(5!x\e(B 0) | |
751 | (?\e$(5!y\e(B 0) | |
752 | (?\e$(5!z\e(B 0) | |
753 | (nil 0) | |
754 | (nil 0) | |
755 | (nil 0) | |
756 | (nil 0) | |
757 | (?\e$(5"!\e(B 0) | |
758 | (?\e$(5""\e(B 0) | |
759 | (?\e$(5"#\e(B 0) | |
760 | (?\e$(5"$\e(B 0) | |
761 | (?\e$(5"%\e(B 0) | |
762 | (?\e$(5"&\e(B 0) | |
763 | (?\e$(5"'\e(B 0) | |
764 | (?\e$(5"(\e(B 0) | |
765 | (?\e$(5")\e(B 0) | |
766 | (?\e$(5"*\e(B 0) | |
767 | (?\e$(5"+\e(B 0) | |
768 | (?\e$(5",\e(B 0) | |
769 | (?\e$(5"-\e(B 0) | |
770 | (?\e$(5".\e(B 0) | |
771 | (?\e$(5"/\e(B 0) | |
772 | (?\e$(5"0\e(B 0) | |
773 | (?\e$(5"1\e(B 0) | |
774 | (?\e$(5"2\e(B 0) | |
775 | (?\e$(5"3\e(B 0) | |
776 | (?\e$(5"4\e(B 0) | |
777 | (?\e$(5"5\e(B 0) | |
778 | (?\e$(5"6\e(B 0) | |
779 | (?\e$(5"7\e(B 0) | |
780 | (?\e$(5"8\e(B 0) | |
781 | (?\e$(5"9\e(B 0) | |
782 | (?\e$(5":\e(B 0) | |
783 | (?\e$(5";\e(B 0) | |
784 | (?\e$(5"<\e(B 0) | |
785 | (?\e$(5"=\e(B 0) | |
786 | (?\e$(5">\e(B 0) | |
787 | (?\e$(5"?\e(B 0) | |
788 | (?\e$(5"@\e(B 0) | |
789 | (?\e$(5"A\e(B 0) | |
790 | (?\e$(5"B\e(B 0) | |
791 | (?\e$(5"C\e(B 0) | |
792 | (?\e$(5"D\e(B 0) | |
793 | (?\e$(5"E\e(B 0) | |
794 | (?\e$(5"F\e(B 0) | |
795 | (?\e$(5"G\e(B 0) | |
796 | (?\e$(5"H\e(B 0) | |
797 | (?\e$(5"I\e(B 0) | |
798 | (?\e$(5"J\e(B 0) | |
799 | (?\e$(5"K\e(B 0) | |
800 | (?\e$(5"L\e(B 0) | |
801 | (?\e$(5"M\e(B 0) | |
802 | (?\e$(5"N\e(B 0) | |
803 | (?\e$(5"O\e(B 0) | |
804 | (?\e$(5"P\e(B 0) | |
805 | (?\e$(5"Q\e(B 0) | |
806 | (?\e$(5"R\e(B 0) | |
807 | (?\e$(5"S\e(B 0) | |
808 | (?\e$(5"T\e(B 0) | |
809 | (?\e$(5"U\e(B 0) | |
810 | (?\e$(5"V\e(B 0) | |
811 | (?\e$(5"W\e(B 0) | |
812 | (?\e$(5"X\e(B 0) | |
813 | (?\e$(5"Y\e(B 0) | |
814 | (?\e$(5"Z\e(B 0) | |
815 | (?\e$(5"[\e(B 0) | |
816 | (?\e$(5"\\e(B 0) | |
817 | (?\e$(5"]\e(B 0) | |
818 | (?\e$(5"^\e(B 0) | |
819 | (?\e$(5"_\e(B 0) | |
820 | (?\e$(5"`\e(B 0) | |
821 | (?\e$(5"a\e(B 0) | |
822 | (?\e$(5"b\e(B 0) | |
823 | (?\e$(5"c\e(B 0) | |
824 | (?\e$(5"d\e(B 0) | |
825 | (?\e$(5"e\e(B 0) | |
826 | (?\e$(5"f\e(B 0) | |
827 | (?\e$(5"g\e(B 0) | |
828 | (?\e$(5"h\e(B 0) | |
829 | (?\e$(5"i\e(B 0) | |
830 | (?\e$(5"j\e(B 0) | |
831 | (?\e$(5"k\e(B 0) | |
832 | (?\e$(5"l\e(B 0) | |
833 | (?\e$(5"m\e(B 0) | |
834 | (?\e$(5"n\e(B 0) | |
835 | (?\e$(5"o\e(B 0) | |
37cdc7ad KH |
836 | (?\e$(5"p\e(B 10 (mr . mr)) |
837 | (?\e$(5"q\e(B 0 (br . br)) | |
838 | (?\e$(5"r\e(B 0 (br . tr)) | |
4ed46869 KH |
839 | (?\e$(5"s\e(B 0) |
840 | (?\e$(5"t\e(B 0) | |
841 | (?\e$(5"u\e(B 0) | |
842 | (?\e$(5"v\e(B 0) | |
843 | (?\e$(5"w\e(B 0) | |
844 | (?\e$(5"x\e(B 0) | |
845 | (?\e$(5"y\e(B 0) | |
846 | (?\e$(5"z\e(B 0) | |
847 | (?\e$(5"{\e(B 0) | |
848 | (?\e$(5"|\e(B 0) | |
849 | (?\e$(5"}\e(B 0) | |
850 | (?\e$(5"~\e(B 0) | |
851 | (?\e$(5#!\e(B 0) | |
852 | (?\e$(5#"\e(B 0) | |
853 | (?\e$(5##\e(B 0) | |
854 | (?\e$(5#$\e(B 0) | |
855 | (?\e$(5#%\e(B 0) | |
856 | (?\e$(5#&\e(B 0) | |
857 | (?\e$(5#'\e(B 0) | |
858 | (?\e$(5#(\e(B 0) | |
859 | (?\e$(5#)\e(B 0) | |
860 | (?\e$(5#*\e(B 0) | |
861 | (?\e$(5#+\e(B 0) | |
862 | (?\e$(5#,\e(B 0) | |
863 | (?\e$(5#-\e(B 0) | |
864 | (?\e$(5#.\e(B 0) | |
865 | (?\e$(5#/\e(B 0) | |
866 | (?\e$(5#0\e(B 0) | |
867 | (?\e$(5#1\e(B 0) | |
868 | (?\e$(5#2\e(B 0) | |
869 | (?\e$(5#3\e(B 0) | |
870 | (?\e$(5#4\e(B 0) | |
871 | (?\e$(5#5\e(B 0) | |
872 | (?\e$(5#6\e(B 0) | |
873 | (?\e$(5#7\e(B 0) | |
874 | (?\e$(5#8\e(B 0) | |
875 | (?\e$(5#9\e(B 0) | |
876 | (?\e$(5#:\e(B 0) | |
877 | (?\e$(5#;\e(B 0) | |
878 | (?\e$(5#<\e(B 0) | |
879 | (?\e$(5#=\e(B 0) | |
880 | (?\e$(5#>\e(B 0) | |
881 | (?\e$(5#?\e(B 0) | |
882 | (?\e$(5#@\e(B 0) | |
883 | (?\e$(5#A\e(B 0) | |
884 | (?\e$(5#B\e(B 0) | |
885 | (?\e$(5#C\e(B 0) | |
886 | (?\e$(5#D\e(B 0) | |
887 | (?\e$(5#E\e(B 0) | |
888 | (?\e$(5#F\e(B 0) | |
889 | (?\e$(5#G\e(B 0) | |
890 | (?\e$(5#H\e(B 0) | |
891 | (?\e$(5#I\e(B 0) | |
892 | (?\e$(5#J\e(B 0) | |
37cdc7ad KH |
893 | (?\e$(5#K\e(B 0 (br . tr)) |
894 | (?\e$(5#L\e(B 0 (br . tr)) | |
895 | (?\e$(5#M\e(B 0 (br . tr)) | |
4ed46869 KH |
896 | (?\e$(5#N\e(B 0) |
897 | (?\e$(5#O\e(B 0) | |
898 | (?\e$(5#P\e(B 0) | |
899 | (?\e$(5#Q\e(B 0) | |
900 | (?\e$(5#R\e(B 0) | |
901 | (?\e$(5#S\e(B 0) | |
902 | (?\e$(5#T\e(B 0) | |
903 | (?\e$(5#U\e(B 0) | |
904 | (?\e$(5#V\e(B 0) | |
905 | (?\e$(5#W\e(B 0) | |
906 | (?\e$(5#X\e(B 0) | |
907 | (?\e$(5#Y\e(B 0) | |
908 | (?\e$(5#Z\e(B 0) | |
909 | (?\e$(5#[\e(B 0) | |
910 | (?\e$(5#\\e(B 0) | |
911 | (?\e$(5#]\e(B 0) | |
912 | (?\e$(5#^\e(B 0) | |
913 | (?\e$(5#_\e(B 0) | |
914 | (?\e$(5#`\e(B 0) | |
915 | (?\e$(5#a\e(B 0) | |
916 | (?\e$(5#b\e(B 0) | |
917 | (?\e$(5#c\e(B 0) | |
918 | (?\e$(5#d\e(B 0) | |
919 | (?\e$(5#e\e(B 0) | |
920 | (?\e$(5#f\e(B 0) | |
921 | (?\e$(5#g\e(B 0) | |
922 | (?\e$(5#h\e(B 0) | |
923 | (?\e$(5#i\e(B 0) | |
924 | (?\e$(5#j\e(B 0) | |
925 | (?\e$(5#k\e(B 0) | |
926 | (?\e$(5#l\e(B 0) | |
927 | (?\e$(5#m\e(B 0) | |
928 | (?\e$(5#n\e(B 0) | |
929 | (?\e$(5#o\e(B 0) | |
930 | (?\e$(5#p\e(B 0) | |
931 | (?\e$(5#q\e(B 0) | |
932 | (?\e$(5#r\e(B 0) | |
933 | (?\e$(5#s\e(B 0) | |
934 | (?\e$(5#t\e(B 0) | |
935 | (?\e$(5#u\e(B 0) | |
936 | (?\e$(5#v\e(B 0) | |
937 | (?\e$(5#w\e(B 0) | |
938 | (?\e$(5#x\e(B 0) | |
939 | (?\e$(5#y\e(B 0) | |
940 | (?\e$(5#z\e(B 0) | |
941 | (?\e$(5#{\e(B 0) | |
942 | (?\e$(5#|\e(B 0) | |
943 | (?\e$(5#}\e(B 0) | |
944 | (?\e$(5#~\e(B 0) | |
945 | (?\e$(5$!\e(B 0) | |
946 | (?\e$(5$"\e(B 0) | |
947 | (?\e$(5$#\e(B 0) | |
948 | (?\e$(5$$\e(B 0) | |
949 | (?\e$(5$%\e(B 0) | |
950 | (?\e$(5$&\e(B 0) | |
951 | (?\e$(5$'\e(B 0) | |
952 | (?\e$(5$(\e(B 0) | |
953 | (?\e$(5$)\e(B 0) | |
954 | (?\e$(5$*\e(B 0) | |
955 | (?\e$(5$+\e(B 0) | |
956 | (?\e$(5$,\e(B 0) | |
957 | (?\e$(5$-\e(B 0) | |
958 | (?\e$(5$.\e(B 0) | |
959 | (?\e$(5$/\e(B 0) | |
960 | (?\e$(5$0\e(B 0) | |
961 | (?\e$(5$1\e(B 0) | |
962 | (?\e$(5$2\e(B 0) | |
963 | (?\e$(5$3\e(B 0) | |
964 | (?\e$(5$4\e(B 0) | |
965 | (?\e$(5$5\e(B 0) | |
966 | (?\e$(5$6\e(B 0) | |
967 | (?\e$(5$7\e(B 0) | |
968 | (?\e$(5$8\e(B 0) | |
969 | (?\e$(5$9\e(B 0) | |
970 | (?\e$(5$:\e(B 0) | |
971 | (?\e$(5$;\e(B 0) | |
972 | (?\e$(5$<\e(B 0) | |
973 | (?\e$(5$=\e(B 0) | |
974 | (?\e$(5$>\e(B 0) | |
975 | (?\e$(5$?\e(B 0) | |
976 | (?\e$(5$@\e(B 0) | |
977 | (?\e$(5$A\e(B 0) | |
978 | (?\e$(5$B\e(B 0) | |
979 | (?\e$(5$C\e(B 0) | |
980 | (?\e$(5$D\e(B 0) | |
981 | (?\e$(5$E\e(B 0) | |
982 | (?\e$(5$F\e(B 0) | |
983 | (?\e$(5$G\e(B 0) | |
984 | (?\e$(5$H\e(B 0) | |
985 | (?\e$(5$I\e(B 0) | |
986 | (?\e$(5$J\e(B 0) | |
987 | (?\e$(5$K\e(B 0) | |
988 | (?\e$(5$L\e(B 0) | |
989 | (?\e$(5$M\e(B 0) | |
990 | (?\e$(5$N\e(B 0) | |
991 | (?\e$(5$O\e(B 0) | |
992 | (?\e$(5$P\e(B 0) | |
993 | (?\e$(5$Q\e(B 0) | |
994 | (?\e$(5$R\e(B 0) | |
995 | (?\e$(5$S\e(B 0) | |
996 | (?\e$(5$T\e(B 0) | |
997 | (?\e$(5$U\e(B 0) | |
998 | (?\e$(5$V\e(B 0) | |
999 | (?\e$(5$W\e(B 0) | |
1000 | (?\e$(5$X\e(B 0) | |
1001 | (?\e$(5$Y\e(B 0) | |
1002 | (?\e$(5$Z\e(B 0) | |
1003 | (?\e$(5$[\e(B 0) | |
1004 | (?\e$(5$\\e(B 0) | |
1005 | (?\e$(5$]\e(B 0) | |
1006 | (?\e$(5$^\e(B 0) | |
1007 | (?\e$(5$_\e(B 0) | |
1008 | (?\e$(5$`\e(B 0) | |
1009 | (?\e$(5$a\e(B 0) | |
1010 | (?\e$(5$b\e(B 0) | |
1011 | (?\e$(5$c\e(B 0) | |
1012 | (?\e$(5$d\e(B 0) | |
1013 | (?\e$(5$e\e(B 0) | |
1014 | (?\e$(5$f\e(B 0) | |
1015 | (?\e$(5$g\e(B 0) | |
1016 | (?\e$(5$h\e(B 0) | |
1017 | (?\e$(5$i\e(B 0) | |
1018 | (?\e$(5$j\e(B 0) | |
1019 | (?\e$(5$k\e(B 0) | |
1020 | (?\e$(5$l\e(B 0) | |
1021 | (?\e$(5$m\e(B 0) | |
1022 | (?\e$(5$n\e(B 0) | |
1023 | (?\e$(5$o\e(B 0) | |
1024 | (?\e$(5$p\e(B 0) | |
1025 | (?\e$(5$q\e(B 0) | |
1026 | (?\e$(5$r\e(B 0) | |
1027 | (?\e$(5$s\e(B 0) | |
1028 | (?\e$(5$t\e(B 0) | |
1029 | (?\e$(5$u\e(B 0) | |
1030 | (?\e$(5$v\e(B 0) | |
1031 | (?\e$(5$w\e(B 0) | |
1032 | (?\e$(5$x\e(B 0) | |
1033 | (?\e$(5$y\e(B 0) | |
1034 | (?\e$(5$z\e(B 0) | |
1035 | (?\e$(5${\e(B 0) | |
1036 | (?\e$(5$|\e(B 0) | |
1037 | (?\e$(5$}\e(B 0) | |
1038 | (?\e$(5$~\e(B 0) | |
1039 | )) | |
1040 | ||
1041 | ;; Determine composition priority and rule of the array of Glyphs. | |
1042 | ;; Sort the glyphs with their priority. | |
1043 | ||
37cdc7ad | 1044 | (defun devanagari-reorder-glyphs-for-composition (glyph-alist) |
4ed46869 KH |
1045 | (let* ((pos 0) |
1046 | (ordered-glyphs '())) | |
1047 | (while (< pos (length glyph-alist)) | |
1048 | (let* ((glyph (aref glyph-alist pos))) | |
1049 | (setq pos (1+ pos)) | |
1050 | (setq ordered-glyphs | |
1051 | (append ordered-glyphs (list (assq glyph devanagari-composition-rules)))))) | |
1052 | (sort ordered-glyphs '(lambda (x y) (< (car (cdr x)) (car (cdr y))))))) | |
1053 | ||
1054 | ;;(devanagari-compose-to-one-glyph "\e$(5"5!X![\e(B") => "\e2\e$(6!XP"5@![\e1\e(B" | |
1055 | ||
1056 | (defun devanagari-compose-to-one-glyph (devanagari-string) | |
37cdc7ad | 1057 | (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition |
4ed46869 KH |
1058 | (string-to-vector devanagari-string))) |
1059 | ;; List of glyphs to be composed. | |
1060 | (cmp-glyph-list (list (car (car o-glyph-list)))) | |
1061 | (o-glyph-list (cdr o-glyph-list))) | |
1062 | (while o-glyph-list | |
1063 | (let* ((o-glyph (car o-glyph-list)) | |
1064 | (glyph (if (< 2 (length o-glyph)) | |
1065 | ;; default composition | |
1066 | (list (car (cdr (cdr o-glyph))) (car o-glyph)) | |
1067 | ;; composition with a specified rule | |
1068 | (list '(mr . ml) (car o-glyph))))) | |
1069 | (setq o-glyph-list (cdr o-glyph-list)) | |
1070 | (setq cmp-glyph-list (append cmp-glyph-list glyph)))) | |
1071 | ;; Before applying compose-chars, convert glyphs to | |
1072 | ;; 1-column width if possible. | |
1073 | (setq cmp-glyph-list (devanagari-wide-to-narrow cmp-glyph-list)) | |
1074 | (if (= (length cmp-glyph-list) 1) (char-to-string (car cmp-glyph-list)) | |
1075 | (apply 'compose-chars cmp-glyph-list)))) | |
1076 | ||
bd09f27b KH |
1077 | ;; Utility function for Phase 2.5 |
1078 | ;; Check whether given glyph is a Devanagari vertical modifier or not. | |
1079 | ;; If it is a vertical modifier, whether it should be 1-column shape or not | |
1080 | ;; depends on previous non-vertical modifier. | |
1081 | ; return nil if it is not vertical modifier. | |
1082 | (defun devanagari-vertical-modifier-p (glyph) | |
1083 | (string-match (char-to-string glyph) | |
1084 | "[\e$(5!]!^!_!`!a!b!c!h!i"p"q"r#K#L#M\e(B]")) | |
1085 | ||
1086 | (defun devanagari-non-vertical-modifier-p (glyph) | |
1087 | (string-match (char-to-string glyph) | |
1088 | "[\e$(5!Z![!\!d!e!f!g\e(B]")) | |
1089 | ||
4ed46869 KH |
1090 | |
1091 | ;; | |
1092 | ;; Phase 2.5 Convert Appropriate Character to 1-column shape. | |
1093 | ;; | |
1094 | ;; This is temporary and should be removed out when Emacs supports | |
1095 | ;; variable width characters. | |
1096 | ;; | |
1097 | ;; This will convert the composing glyphs (2 column glyphs) | |
1098 | ;; to narrow (1 column) glyphs if they exist. | |
1099 | ;; | |
1100 | ;; devanagari-wide-to-narrow-old converts glyphs simply. | |
1101 | ;; devanagari-wide-to-narrow takes care of upper/lower apply-glyphs | |
1102 | ;; with 2 column base-glyph. | |
1103 | ;; | |
1104 | ;; Execution Examples | |
bd09f27b KH |
1105 | ;;(devanagari-wide-to-narrow '(?\e$(5!3\e(B (ml . ml) ?\e$(5!a\e(B)) |
1106 | ;;(devanagari-wide-to-narrow '(?\e$(5!F\e(B (ml . ml) ?\e$(5!a\e(B)) | |
1107 | ||
1108 | ;(defun devanagari-wide-to-narrow (src-list) | |
1109 | ; (if (null src-list) '() | |
1110 | ; (cons | |
1111 | ; (if (and (numberp (car src-list)) | |
1112 | ; (cdr (assq (car src-list) devanagari-1-column-char))) | |
1113 | ; (cdr (assq (car src-list) devanagari-1-column-char)) | |
1114 | ; (car src-list)) | |
1115 | ; (devanagari-wide-to-narrow (cdr src-list))))) | |
4ed46869 KH |
1116 | |
1117 | (defun devanagari-wide-to-narrow (src-list) | |
bd09f27b KH |
1118 | (devanagari-wide-to-narrow-iter src-list t)) |
1119 | ||
1120 | (defun devanagari-wide-to-narrow-iter (src-list wide-p) | |
1121 | (let ((glyph (car src-list))) | |
1122 | (cond ((null src-list) '()) | |
1123 | ; not glyph code | |
1124 | ((not (numberp glyph)) | |
1125 | (cons glyph (devanagari-wide-to-narrow-iter (cdr src-list) wide-p))) | |
1126 | ; vertical modifier glyph | |
1127 | ((devanagari-vertical-modifier-p glyph) | |
1128 | (if (and (null wide-p) | |
1129 | (cdr (assq glyph devanagari-1-column-char))) | |
1130 | (cons (cdr (assq glyph devanagari-1-column-char)) | |
1131 | (devanagari-wide-to-narrow-iter (cdr src-list) nil)) | |
1132 | (cons glyph | |
1133 | (devanagari-wide-to-narrow-iter (cdr src-list) t)))) | |
1134 | ; nonvertical modifier glyph | |
1135 | ((devanagari-non-vertical-modifier-p glyph) | |
1136 | (if (cdr (assq glyph devanagari-1-column-char)) | |
1137 | (cons (cdr (assq glyph devanagari-1-column-char)) | |
1138 | (devanagari-wide-to-narrow-iter (cdr src-list) wide-p)) | |
1139 | (cons glyph | |
1140 | (devanagari-wide-to-narrow-iter (cdr src-list) wide-p)))) | |
1141 | ; normal glyph | |
1142 | (t | |
1143 | (if (cdr (assq glyph devanagari-1-column-char)) | |
1144 | (cons (cdr (assq glyph devanagari-1-column-char)) | |
1145 | (devanagari-wide-to-narrow-iter (cdr src-list) nil)) | |
1146 | (cons glyph | |
1147 | (devanagari-wide-to-narrow-iter (cdr src-list) t))))))) | |
1148 | ||
4ed46869 KH |
1149 | |
1150 | ;; | |
1151 | ;; Summary | |
1152 | ;; | |
1153 | ||
4ed46869 KH |
1154 | ;; |
1155 | ;; Decomposition of composite font. | |
1156 | ;; | |
1157 | ||
1158 | (defun devanagari-normalize-narrow-glyph (charlist) | |
1159 | (let ((wide-char (car (rassoc (car charlist) devanagari-1-column-char)))) | |
1160 | (if (null charlist) nil | |
1161 | (cons (if (null wide-char) (car charlist) wide-char) | |
1162 | (devanagari-normalize-narrow-glyph (cdr charlist)))))) | |
1163 | ||
1164 | (defvar devanagari-decomposition-rules | |
1165 | '( | |
37cdc7ad | 1166 | (?\e$(5"p\e(B -10) |
4ed46869 KH |
1167 | ) |
1168 | ) | |
1169 | ||
37cdc7ad KH |
1170 | (defun devanagari-reorder-glyphs-for-decomposition (glyphlist) |
1171 | "This function re-orders glyph list for decomposition." | |
4ed46869 KH |
1172 | (sort glyphlist |
1173 | '(lambda (x y) | |
1174 | (let ((xx (assoc x devanagari-decomposition-rules)) | |
1175 | (yy (assoc y devanagari-decomposition-rules))) | |
1176 | (if (null xx) (setq xx 0)) | |
1177 | (if (null yy) (setq yy 0)) | |
1178 | (< xx yy))))) | |
1179 | ||
37cdc7ad KH |
1180 | (defun devanagari-decompose-char (glyph) |
1181 | "This function decomposes one Devanagari composite glyph to | |
1182 | basic Devanagari characters as a string." | |
1183 | (let ((glyphlist | |
1184 | (if (eq (car (split-char glyph)) 'composition) | |
1185 | (string-to-list (decompose-composite-char glyph)) | |
1186 | (list glyph)))) | |
4ed46869 | 1187 | (setq glyphlist (devanagari-normalize-narrow-glyph glyphlist)) |
37cdc7ad KH |
1188 | (setq glyphlist (devanagari-reorder-glyphs-for-decomposition glyphlist)) |
1189 | (string-conversion-by-rule | |
1190 | (mapconcat 'char-to-string glyphlist "") 'glyph-to-char))) | |
4ed46869 KH |
1191 | |
1192 | ;;;###autoload | |
1193 | (defun devanagari-decompose-string (str) | |
1194 | "This function Decomposes Devanagari glyph string to | |
1195 | basic Devanagari character string." | |
1196 | (let ((src str) (dst "")) | |
1197 | (while (not (equal src "")) | |
1198 | (let* ((char (string-to-char src)) | |
1199 | (clen (char-bytes char))) | |
1200 | (setq src (substring src clen)) | |
1201 | (setq dst (concat dst | |
1202 | (devanagari-decompose-char char))))) | |
1203 | dst)) | |
1204 | ||
1205 | ;;;###autoload | |
1206 | (defun devanagari-decompose-region (from to) | |
1207 | (interactive "r") | |
1208 | (save-restriction | |
1209 | (narrow-to-region from to) | |
1210 | (goto-char (point-min)) | |
1211 | (while (re-search-forward "." nil t) | |
1212 | (let* ((match-b (match-beginning 0)) (match-e (match-end 0)) | |
1213 | (decmps (devanagari-decompose-string (buffer-substring match-b match-e)))) | |
1214 | (delete-char -1) | |
1215 | (insert decmps))))) | |
1216 | ||
37cdc7ad KH |
1217 | ;;; |
1218 | ;;; Composition | |
1219 | ;;; | |
1220 | ||
1221 | ;;;###autoload | |
1222 | (defun devanagari-compose-string (str &rest langs) | |
1223 | (let ((len (length str)) | |
1224 | (src (devanagari-decompose-string str)) (dst "") rest match-b match-e) | |
1225 | (while (string-match devanagari-composite-glyph-unit src) | |
1226 | (setq match-b (match-beginning 0) match-e (match-end 0)) | |
1227 | (setq dst | |
1228 | (concat dst | |
1229 | (substring src 0 match-b) | |
1230 | (devanagari-compose-to-one-glyph | |
1231 | (apply | |
1232 | 'char-to-glyph-devanagari | |
1233 | (cons (substring src match-b match-e) | |
1234 | langs))))) | |
1235 | (setq src (substring src match-e))) | |
1236 | (setq dst (concat dst src)) | |
1237 | dst)) | |
4ed46869 | 1238 | |
37cdc7ad KH |
1239 | ;;;###autoload |
1240 | (defun devanagari-compose-region (from to &rest langs) | |
1241 | (interactive "r") | |
1242 | (save-restriction | |
1243 | (narrow-to-region from to) | |
1244 | (goto-char (point-min)) | |
1245 | (while (re-search-forward devanagari-composite-glyph-unit nil t) | |
1246 | (let* ((match-b (match-beginning 0)) (match-e (match-end 0)) | |
1247 | (cmps (devanagari-compose-to-one-glyph | |
1248 | (apply | |
1249 | 'char-to-glyph-devanagari | |
1250 | (cons (buffer-substring match-b match-e) | |
1251 | langs))))) | |
1252 | (delete-region match-b match-e) | |
1253 | (insert cmps))))) | |
4ed46869 KH |
1254 | |
1255 | ;; For pre-write and post-read conversion | |
1256 | ||
1257 | ;;;###autoload | |
1258 | (defun devanagari-compose-from-is13194-region (from to) | |
1259 | "Compose IS 13194 characters in the region to Devanagari characters." | |
1260 | (interactive "r") | |
1261 | (save-restriction | |
1262 | (narrow-to-region from to) | |
1263 | (indian-to-devanagari-region (point-min) (point-max)) | |
1264 | (devanagari-compose-region (point-min) (point-max)))) | |
1265 | ||
cefa701a KH |
1266 | ;;;###autoload |
1267 | (defun in-is13194-devanagari-post-read-conversion (len) | |
1268 | (let ((pos (point))) | |
1269 | (devanagari-compose-from-is13194-region pos (+ pos len)))) | |
1270 | ||
4ed46869 KH |
1271 | ;;;###autoload |
1272 | (defun devanagari-decompose-to-is13194-region (from to) | |
1273 | "Decompose Devanagari characters in the region to IS 13194 characters." | |
1274 | (interactive "r") | |
1275 | (save-restriction | |
1276 | (narrow-to-region from to) | |
1277 | (devanagari-decompose-region (point-min) (point-max)) | |
1278 | (devanagari-to-indian-region (point-min) (point-max)))) | |
1279 | ||
cefa701a KH |
1280 | ;;;###autoload |
1281 | (defun in-is13194-devanagari-pre-write-conversion (from to) | |
1282 | (let ((old-buf (current-buffer)) | |
1283 | (work-buf (get-buffer-create " *devanagari-work*"))) | |
1284 | (set-buffer work-buf) | |
1285 | (erase-buffer) | |
1286 | (if (stringp from) | |
1287 | (insert from) | |
1288 | (insert-buffer-substring old-buf from to)) | |
1289 | (devanagari-decompose-to-is13194-region (point-min) (point-max)) | |
1290 | ;; Should return nil as annotations. | |
1291 | nil)) | |
37cdc7ad KH |
1292 | |
1293 | ;; For input/output of ITRANS | |
1294 | ||
1295 | ;;;###autoload | |
1296 | (defun devanagari-encode-itrans-region (from to) | |
1297 | (interactive "r") | |
1298 | (save-restriction | |
1299 | (narrow-to-region from to) | |
1300 | (devanagari-decompose-to-is13194-region (point-min) (point-max)) | |
1301 | (indian-encode-itrans-region (point-min) (point-max)))) | |
1302 | ||
1303 | ;;;###autoload | |
1304 | (defun devanagari-decode-itrans-region (from to) | |
1305 | (interactive "r") | |
1306 | (save-restriction | |
1307 | (narrow-to-region from to) | |
1308 | (indian-decode-itrans-region (point-min) (point-max)) | |
1309 | (devanagari-compose-from-is13194-region (point-min) (point-max)))) | |
1310 | ||
c47ec7c4 RS |
1311 | ;; Test comment. |
1312 | ||
4ed46869 | 1313 | ;; |
650e8505 | 1314 | (provide 'devan-util) |
4ed46869 | 1315 | |
37cdc7ad | 1316 | ;;; devan-util.el end here |