Initial revision
[bpt/emacs.git] / lisp / language / devan-util.el
1 ;;; devan-util.el --- Support for Devanagari Script Composition
2
3 ;; Copyright (C) 1996 Free Software Foundation, Inc.
4
5 ;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
6
7 ;; Keywords: multilingual, Indian, Devanagari
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to
23 ;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24
25 ;;; Commentary:
26
27 ;; History:
28 ;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
29
30 ;; Devanagari script composition rules and related programs.
31
32 ;;; Code:
33
34 ;;;
35 ;;; Steps toward composition of Devanagari Characters.
36 ;;;
37
38 ;;; Basic functions.
39
40 ;;;###autoload
41 (defun indian-to-devanagari (ch)
42 "Convert IS 13194 characters to Devanagari basic characters."
43 (let ((charcodes (split-char ch)))
44 (if (eq (car charcodes) 'indian-is13194)
45 (make-char 'indian-2-column ?\x21 (nth 1 charcodes))
46 ch)))
47
48 ;;;###autoload
49 (defun devanagari-to-indian (ch)
50 "Convert Devanagari basic characters to IS 13194 characters."
51 (let* ((charcodes (split-char ch))
52 (charset (car charcodes))
53 (code-h (car (cdr charcodes))))
54 (if (and (eq (car charcodes) 'indian-2-column)
55 (= (nth 1 charcodes) ?\x21))
56 (make-char 'indian-is13194 (nth 2 charcodes))
57 ch)))
58
59 ;;;###autoload
60 (defun indian-to-devanagari-region (from to)
61 "Convert IS 13194 characters in region to Devanagari basic characters."
62 (interactive "r")
63 (save-restriction
64 (narrow-to-region from to)
65 (goto-char (point-min))
66 (while (re-search-forward "\\cd" nil t)
67 (let* ((devanagari-char (indian-to-devanagari (preceding-char))))
68 (delete-char -1)
69 (insert devanagari-char)))))
70
71 ;;;###autoload
72 (defun devanagari-to-indian-region (from to)
73 "Convert Devanagari basic characters in region to Indian characters."
74 (interactive "r")
75 (save-restriction
76 (narrow-to-region from to)
77 (goto-char (point-min))
78 (while (re-search-forward "\\cD" nil t) ; Devanagari Character Code.
79 (let* ((indian-char (devanagari-to-indian (preceding-char))))
80 (delete-char -1)
81 (insert indian-char)))))
82
83 ;;;###autoload
84 (defun indian-to-devanagari-string (str)
85 "Convert Indian String to Devanagari Basic Character String."
86 (let ((pos 0) (dst "") (src str) char)
87 (while (not (equal src ""))
88 (setq char (string-to-char src))
89 (setq src (substring src (char-bytes char)))
90 (setq dst (concat dst (char-to-string (indian-to-devanagari char)))))
91 dst))
92
93 ;; Phase 0 - Determine whether the characters can be composed.
94 ;;
95 ;;;
96 ;;; Regular expressions to split characters for composition.
97 ;;;
98 ;;
99 ;; Indian script word contains one or more syllables.
100 ;; In BNF, it can be expressed as follows:
101 ;;
102 ;; Word ::= {Syllable} [Cons-Syllable]
103 ;; Syllable ::= Cons-Vowel-Syllable | Vowel-Syllable
104 ;; Vowel-Syllable ::= V[D]
105 ;; Cons-Vowel-Syllable ::= [Cons-Syllable] Full-Cons [M] [D]
106 ;; Cons-Syllable ::= [Pure-Cons] [Pure-Cons] Pure-Cons
107 ;; Pure-Cons ::= Full-Cons H
108 ;; Full-Cons ::= C [N]
109 ;;
110 ;; {} repeat, [] optional
111 ;;
112 ;; C - Consonant (\e$(5!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A!B!C!D!E\e(B
113 ;; \e$(5!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X\e(B)
114 ;; N - Nukta (\e$(5!i\e(B)
115 ;; H - Halant(\e$(5!h\e(B)
116 ;; V - Vowel (\e$(5!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2\e(B)
117 ;; D - Vowel Modifiers, i.e. Anuswar, Chandrabindu, Visarg (\e$(5!!!"\e(B)
118 ;; M - Matra (\e$(5!Z![!\!]!^!_!`!a!b!c!d!e!f!g\e(B)
119 ;;
120 ;; In Emacs, one syllable of Indian language is considered to be one
121 ;; composite glyph. If we expand the above expression, it would be:
122 ;;
123 ;; [[C [N] H] [C [N] H] C [N] H] C [N] [M] [D] | V [D]
124 ;;
125 ;; Therefore, in worst case, the consonant syllabe will consist of
126 ;; following characters.
127 ;;
128 ;; C N H C N H C N H C N M D
129 ;;
130 ;; On the other hand, incomplete consonant syllable before inputting
131 ;; base consonant must satisfy the following condition:
132 ;;
133 ;; [C [N] H] [C [N] H] C [N] H
134 ;;
135 ;; This is acceptable BEFORE proper consonant-syllable is input. The
136 ;; string which doesn't match with the above expression is invalid and
137 ;; thus must be fixed.
138 ;;
139 ;; Note:
140 ;; Third case can be considered, which is acceptable syllable and can
141 ;; not add any code more.
142 ;;
143 ;; [[C [N] H] [C [N] H] C [N] H] C [N] [M] D
144 ;;
145 ;; However, to make editing possible even in this condition, we will
146 ;; not consider about this case.
147
148 (defconst devanagari-cons-syllable-examine
149 "\\(\\([\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?\\([\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?[\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?[\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?[\e$(5!Z\e(B-\e$(5!g\e(B]?[\e$(5!!!"\e(B]?"
150 "Regexp matching to one Devanagari consonant syllable.")
151
152 (defconst devanagari-cons-syllable-incomplete-examine
153 "\\([\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?\\([\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?[\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B$"
154 "Regexp matching to one Devanagari incomplete consonant syllable.")
155
156 (defconst devanagari-vowel-syllable-examine
157 "[\e$(5!$\e(B-\e$(5!2\e(B][\e$(5!!!"!#\e(B]?"
158 "Regexp matching to one Devanagari vowel syllable.")
159
160 ;;
161 ;; Also, digits and virams should be processed other than syllables.
162 ;;
163 ;; In IS 13194, Avagrah is obtained by Nukta after Viram, and
164 ;; OM is obtained by Nukta after Chandrabindu
165 ;;
166 (defconst devanagari-digit-viram-examine
167 "[\e$(5!q\e(B-\e$(5!z!j\e(B]")
168 (defconst devanagari-other-sign-examine
169 "[\e$(5!!!j\e(B]\e$(5!i\e(B")
170
171 (defconst devanagari-composite-glyph-unit-examine
172 (concat "\\(" devanagari-cons-syllable-incomplete-examine
173 "\\)\\|\\(" devanagari-vowel-syllable-examine
174 "\\)\\|\\(" devanagari-digit-viram-examine
175 "\\)\\|\\(" devanagari-cons-syllable-examine
176 "\\)\\|\\(" devanagari-other-sign-examine"\\)")
177 "Regexp matching to Devanagari string to be composed form one glyph.")
178
179 ;;(put-charset-property charset-devanagari-1-column
180 ;; 'char-to-glyph 'devanagari-compose-string)
181 ;;(put-charset-property charset-devanagari-2-column
182 ;; 'char-to-glyph 'devanagari-compose-string)
183
184 ;; Sample
185 ;;
186 ;;(string-match devanagari-cons-syllable-examine "\e$(5!X![\e(B") => 0
187 ;;(string-match devanagari-cons-syllable-examine "\e$(5!F!h!D!\\e(B") => 0
188 ;;(string-match devanagari-cons-syllable-examine "\e$(5!X![!F!h!D!\\e(B") => 0
189
190 ;;
191 ;; Steps toward the composition
192 ;; Converting Character Code to Composite Glyph.
193 ;;
194 ;; Example : \e$(5!X![\e(B/\e$(5!F!h!D!\\e(B
195 ;;
196 ;; First, convert Characters to appropriate glyphs.
197 ;;
198 ;; => \e$(5!X![\e(B/\e$(5"F!D!\\e(B
199 ;;
200 ;; Then, determine the base glyph, apply-orders and apply-rules.
201 ;;
202 ;; => \e$(5!X\e(B (ml.mr) \e$(5![\e(B / \e$(5!D\e(B (ml.mr) \e$(5"F\e(B (mr ml) \e$(5!\\e(B
203 ;;
204 ;; Finally, convert 2-column glyphs to 1-column glyph
205 ;; if such a glyph exist.
206 ;;
207 ;; => \e$(6![\e(B (ml.mr) \e$(6!X\e(B / \e$(6!D\e(B (ml.mr) \e$(6"F\e(B (mr ml) \e$(6!\\e(B
208 ;;
209 ;; Compose the glyph.
210 ;;
211 ;; => \e2\e$(6!X@![\e1\e(B/\e2\e$(6!D@"FP!\\e1\e(B
212 ;; => \e2\e$(6!X@![\e1\e2!D@"FP!\\e1\e(B
213 ;;
214
215 ;;
216 ;; Phase 1: Converting Character Code to Glyph Code.
217 ;;
218 ;;
219 ;; IMPORTANT:
220 ;; There may be many rules which you many want to be suppressed.
221 ;; In that case, please comment out that rule.
222 ;;
223 ;; RULES WILL BE EVALUATED FROM FIRST TO LAST.
224 ;; PUT MORE SPECIFIC RULES FIRST.
225 ;;
226 ;; TO DO:
227 ;; Prepare multiple specific list of rules for each languages
228 ;; which adopts Devanagari script.
229 ;;
230
231
232 (defconst devanagari-char-to-glyph-rules
233 '(
234 ;; special form for "ru".
235 ("\\(\e$(5!O!]\e(B\\)" . "\e$(5",\e(B")
236 ("\\(\e$(5!O!^\e(B\\)" . "\e$(5"-\e(B")
237 ("\\(\e$(5!P!]\e(B\\)" . "\e$(5".\e(B")
238 ("\\(\e$(5!P!^\e(B\\)" . "\e$(5"/\e(B")
239
240 ;; `r' at the top of syllable and followed by other consonants.
241 ;; ("[^\e$(5!h\e(B]\\(\e$(5!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"p\e(B")
242 ("^\\(\e$(5!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"p\e(B")
243
244 ;; If "r" is preceded by the vowel-suppressed consonant
245 ;; (especially those with vertical line), it will be written as
246 ;; slanted line below the preceding consonant character. Some of
247 ;; them are pre-composed as one glyph.
248
249 ("\\(\e$(5!:!i!h!O\e(B\\)" . "\e$(5"!\e(B")
250 ("\\(\e$(5!I!i!h!O\e(B\\)" . "\e$(5""\e(B")
251 ("\\(\e$(5!3!h!O\e(B\\)" . "\e$(5"#\e(B")
252 ("\\(\e$(5!:!h!O\e(B\\)" . "\e$(5"$\e(B")
253 ("\\(\e$(5!B!h!O\e(B\\)" . "\e$(5"%\e(B")
254 ("\\(\e$(5!H!h!O\e(B\\)" . "\e$(5"&\e(B")
255 ("\\(\e$(5!I!h!O\e(B\\)" . "\e$(5"'\e(B")
256 ("\\(\e$(5!U!h!O\e(B\\)" . "\e$(5"(\e(B")
257 ("\\(\e$(5!W!h!O\e(B\\)" . "\e$(5")\e(B")
258
259 ;; Ligature Rules
260 ("\\(\e$(5!3!h!B!h!O!h!M\e(B\\)" . "\e$(5$!\e(B")
261 ("\\(\e$(5!3!h!B!h!T\e(B\\)" . "\e$(5$"\e(B")
262 ("\\(\e$(5!3!h!B!h!M\e(B\\)" . "\e$(5$#\e(B")
263 ("\\(\e$(5!3!h!F!h!M\e(B\\)" . "\e$(5$$\e(B")
264 ("\\(\e$(5!3!h!O!h!M\e(B\\)" . "\e$(5$%\e(B")
265 ("\\(\e$(5!3!h!T!h!M\e(B\\)" . "\e$(5$&\e(B")
266 ("\\(\e$(5!3!h!3\e(B\\)" . "\e$(5$'\e(B")
267 ("\\(\e$(5!3!h!B\e(B\\)" . "\e$(5$(\e(B")
268 ("\\(\e$(5!3!h!F\e(B\\)" . "\e$(5$)\e(B")
269 ("\\(\e$(5!3!h!L\e(B\\)" . "\e$(5$*\e(B")
270 ("\\(\e$(5!3!h!M\e(B\\)" . "\e$(5$+\e(B")
271 ("\\(\e$(5!3!h!Q\e(B\\)" . "\e$(5$,\e(B")
272 ("\\(\e$(5!3!h!T\e(B\\)" . "\e$(5$-\e(B")
273 ("\\(\e$(5!3!h!V\e(B\\)" . "\e$(5$.\e(B")
274 ("\\(\e$(5!6!h!F\e(B\\)" . "\e$(5$/\e(B")
275 ("\\(\e$(5!7!h!3!h!B!h!M\e(B\\)" . "\e$(5$0\e(B")
276 ("\\(\e$(5!7!h!3!h!V!h!T\e(B\\)" . "\e$(5$1\e(B")
277 ("\\(\e$(5!7!h!3!h!B\e(B\\)" . "\e$(5$2\e(B")
278 ("\\(\e$(5!7!h!3!h!V\e(B\\)" . "\e$(5$3\e(B")
279 ("\\(\e$(5!7!h!6!h!O\e(B\\)" . "\e$(5$4\e(B")
280 ("\\(\e$(5!7!h!3!h!M\e(B\\)" . "\e$(5$5\e(B")
281 ("\\(\e$(5!7!h!4!h!M\e(B\\)" . "\e$(5$6\e(B")
282 ("\\(\e$(5!7!h!5!h!M\e(B\\)" . "\e$(5$7\e(B")
283 ("\\(\e$(5!7!h!6!h!M\e(B\\)" . "\e$(5$8\e(B")
284 ("\\(\e$(5!7!h!3\e(B\\)" . "\e$(5$9\e(B")
285 ("\\(\e$(5!7!h!4\e(B\\)" . "\e$(5$:\e(B")
286 ("\\(\e$(5!7!h!5\e(B\\)" . "\e$(5$;\e(B")
287 ("\\(\e$(5!7!h!6\e(B\\)" . "\e$(5$<\e(B")
288 ("\\(\e$(5!7!h!7\e(B\\)" . "\e$(5$=\e(B")
289 ("\\(\e$(5!7!h!F\e(B\\)" . "\e$(5$>\e(B")
290 ("\\(\e$(5!7!h!L\e(B\\)" . "\e$(5$?\e(B")
291 ("\\(\e$(5!7!h!M\e(B\\)" . "\e$(5$@\e(B")
292 ("\\(\e$(5!8!h!8\e(B\\)" . "\e$(5$A\e(B")
293 ("\\(\e$(5!8!h!<\e(B\\)" . "\e$(5$B\e(B")
294 ("\\(\e$(5!9!h!M\e(B\\)" . "\e$(5$C\e(B")
295 ("\\(\e$(5!:!h!O\e(B\\)" . "\e$(5$D\e(B")
296 ("\\(\e$(5!:!h!h\e(B\\)" . "\e$(5$E\e(B")
297 ("\\(\e$(5!<!h!8\e(B\\)" . "\e$(5$F\e(B")
298 ("\\(\e$(5!<!h!:\e(B\\)" . "\e$(5$G\e(B")
299 ("\\(\e$(5!=!h!3\e(B\\)" . "\e$(5$H\e(B")
300 ("\\(\e$(5!=!h!=\e(B\\)" . "\e$(5$I\e(B")
301 ("\\(\e$(5!=!h!>\e(B\\)" . "\e$(5$J\e(B")
302 ("\\(\e$(5!=!h!M\e(B\\)" . "\e$(5$K\e(B")
303 ("\\(\e$(5!>!h!M\e(B\\)" . "\e$(5$L\e(B")
304 ("\\(\e$(5!?!h!5!h!M\e(B\\)" . "\e$(5$M\e(B")
305 ("\\(\e$(5!?!h!6!h!O\e(B\\)" . "\e$(5$N\e(B")
306 ("\\(\e$(5!?!h!O!h!M\e(B\\)" . "\e$(5$O\e(B")
307 ("\\(\e$(5!?!h!5\e(B\\)" . "\e$(5$P\e(B")
308 ("\\(\e$(5!?!h!6\e(B\\)" . "\e$(5$Q\e(B")
309 ("\\(\e$(5!?!h!?\e(B\\)" . "\e$(5$R\e(B")
310 ("\\(\e$(5!?!h!L\e(B\\)" . "\e$(5$S\e(B")
311 ("\\(\e$(5!?!h!M\e(B\\)" . "\e$(5$T\e(B")
312 ("\\(\e$(5!@!h!M\e(B\\)" . "\e$(5$`\e(B")
313 ("\\(\e$(5!B!h!B\e(B\\)" . "\e$(5$a\e(B")
314 ("\\(\e$(5!B!h!F\e(B\\)" . "\e$(5$b\e(B")
315 ("\\(\e$(5!D!h!D!h!M\e(B\\)" . "\e$(5$c\e(B")
316 ("\\(\e$(5!D!h!E!h!M\e(B\\)" . "\e$(5$d\e(B")
317 ("\\(\e$(5!D!h!K!h!M\e(B\\)" . "\e$(5$e\e(B")
318 ("\\(\e$(5!D!h!O!h!M\e(B\\)" . "\e$(5$f\e(B")
319 ("\\(\e$(5!D!h!T!h!M\e(B\\)" . "\e$(5$g\e(B")
320 ("\\(\e$(5!D!h!5!h!O\e(B\\)" . "\e$(5$h\e(B")
321 ("\\(\e$(5!D!h!6!h!O\e(B\\)" . "\e$(5$i\e(B")
322 ("\\(\e$(5!D!h!D!h!T\e(B\\)" . "\e$(5$j\e(B")
323 ("\\(\e$(5!D!h!E!h!T\e(B\\)" . "\e$(5$k\e(B")
324 ("\\(\e$(5!D!h!5\e(B\\)" . "\e$(5$l\e(B")
325 ("\\(\e$(5!D!h!6\e(B\\)" . "\e$(5$m\e(B")
326 ("\\(\e$(5!D!h!D\e(B\\)" . "\e$(5$n\e(B")
327 ("\\(\e$(5!D!h!E\e(B\\)" . "\e$(5$o\e(B")
328 ("\\(\e$(5!D!h!F\e(B\\)" . "\e$(5$p\e(B")
329 ("\\(\e$(5!D!h!J\e(B\\)" . "\e$(5$q\e(B")
330 ("\\(\e$(5!D!h!K\e(B\\)" . "\e$(5$r\e(B")
331 ("\\(\e$(5!D!h!L\e(B\\)" . "\e$(5$s\e(B")
332 ("\\(\e$(5!D!h!M\e(B\\)" . "\e$(5$t\e(B")
333 ("\\(\e$(5!D!h!T\e(B\\)" . "\e$(5$u\e(B")
334 ("\\(\e$(5!E!h!F\e(B\\)" . "\e$(5$v\e(B")
335 ("\\(\e$(5!F!h!F\e(B\\)" . "\e$(5$w\e(B")
336 ("\\(\e$(5!H!h!B\e(B\\)" . "\e$(5$x\e(B")
337 ("\\(\e$(5!H!h!F\e(B\\)" . "\e$(5$y\e(B")
338 ("\\(\e$(5!H!h!Q\e(B\\)" . "\e$(5$z\e(B")
339 ("\\(\e$(5!J!h!F\e(B\\)" . "\e$(5${\e(B")
340 ("\\(\e$(5!J!h!J\e(B\\)" . "\e$(5$|\e(B")
341 ("\\(\e$(5!J!h!T\e(B\\)" . "\e$(5$}\e(B")
342 ("\\(\e$(5!K!h!F\e(B\\)" . "\e$(5$~\e(B")
343 ("\\(\e$(5!L!h!F\e(B\\)" . "\e$(5#P\e(B")
344 ("\\(\e$(5!L!h!Q\e(B\\)" . "\e$(5#Q\e(B")
345 ("\\(\e$(5!Q!h!Q\e(B\\)" . "\e$(5#`\e(B")
346 ("\\(\e$(5!T!h!F\e(B\\)" . "\e$(5#a\e(B")
347 ("\\(\e$(5!T!h!T\e(B\\)" . "\e$(5#b\e(B")
348 ("\\(\e$(5!U!h!8\e(B\\)" . "\e$(5#c\e(B")
349 ("\\(\e$(5!U!h!F\e(B\\)" . "\e$(5#d\e(B")
350 ("\\(\e$(5!U!h!J\e(B\\)" . "\e$(5#e\e(B")
351 ("\\(\e$(5!U!h!Q\e(B\\)" . "\e$(5#f\e(B")
352 ("\\(\e$(5!U!h!T\e(B\\)" . "\e$(5#g\e(B")
353 ("\\(\e$(5!V!h!=!h!O!h!M\e(B\\)" . "\e$(5#h\e(B")
354 ("\\(\e$(5!V!h!=!h!M\e(B\\)" . "\e$(5#i\e(B")
355 ("\\(\e$(5!V!h!=!h!T\e(B\\)" . "\e$(5#j\e(B")
356 ("\\(\e$(5!V!h!=\e(B\\)" . "\e$(5#k\e(B")
357 ("\\(\e$(5!V!h!>\e(B\\)" . "\e$(5#l\e(B")
358 ("\\(\e$(5!W!h!F\e(B\\)" . "\e$(5#m\e(B")
359 ("\\(\e$(5!W!h!O\e(B\\)" . "\e$(5#n\e(B")
360 ("\\(\e$(5!X!h!A\e(B\\)" . "\e$(5#p\e(B")
361 ("\\(\e$(5!X!h!F\e(B\\)" . "\e$(5#q\e(B")
362 ("\\(\e$(5!X!h!L\e(B\\)" . "\e$(5#r\e(B")
363 ("\\(\e$(5!X!h!M\e(B\\)" . "\e$(5#s\e(B")
364 ("\\(\e$(5!X!h!O\e(B\\)" . "\e$(5#t\e(B")
365 ("\\(\e$(5!X!h!Q\e(B\\)" . "\e$(5#u\e(B")
366 ("\\(\e$(5!X!h!T\e(B\\)" . "\e$(5#v\e(B")
367 ;; Special Ligature Rules
368 ("\\(\e$(5!X!_\e(B\\)" . "\e$(5#R\e(B")
369
370 ;; Half form with ligature. Special "r" case is included. "r"
371 ;; connection which is not listed here has not been examined yet.
372 ;; I don't know what to do with them.
373 ;;
374 ;; special forms
375 ("\\(\e$(5!3!h!V!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"l\e(B")
376 ("\\(\e$(5!:!h!<!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"m\e(B")
377 ;; ordinary forms
378 ("\\(\e$(5!5!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"`\e(B")
379 ("\\(\e$(5!6!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"a\e(B")
380 ;; ("\\(\e$(5!<!h!8!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"c\e(B") ; Mistake, must check later.
381 ("\\(\e$(5!B!h!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"c\e(B")
382 ("\\(\e$(5!B!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"d\e(B")
383 ("\\(\e$(5!E!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"e\e(B")
384 ("\\(\e$(5!E!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"f\e(B")
385 ("\\(\e$(5!H!h!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"g\e(B")
386 ("\\(\e$(5!U!h!8!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"h\e(B")
387 ("\\(\e$(5!U!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"i\e(B")
388 ("\\(\e$(5!U!h!T!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"j\e(B")
389 ;; ("\\(\e$(5!U!h!T!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"k\e(B") ; must check later.
390 ;; Conjunction form associated with Nukta sign.
391 ("\\(\e$(5!3!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"s\e(B")
392 ("\\(\e$(5!4!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"t\e(B")
393 ("\\(\e$(5!5!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"u\e(B")
394 ("\\(\e$(5!:!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"z\e(B")
395 ("\\(\e$(5!I!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"y\e(B")
396
397 ;; For consonants other than listed above, glyph-composition will
398 ;; be applied. If the consonant which is preceding "\e$(5!O\e(B" does not
399 ;; have the vertical line (such as "\e$(5!?\e(B"), "\e$(5"r\e(B" is put beneath the
400 ;; consonant.
401 ;;
402 ;; ("cons-not-yet-listed-up\\(\e$(5!h!O\e(B\\)" . "\e$(5"q\e(B")
403 ("[\e$(5!7!9!=!>!?!@!D!O!P!R!S!X\e(B]\\(\e$(5!h!O\e(B\\)" . "\e$(5"r\e(B")
404 ("\e$(5!?!i\e(B\\(\e$(5!h!O\e(B\\)" . "\e$(5"r\e(B")
405 ("\e$(5!@!i\e(B\\(\e$(5!h!O\e(B\\)" . "\e$(5"r\e(B")
406
407 ;; Nukta
408 ("\\(\e$(5!!!i\e(B\\)" . "\e$(5#!\e(B")
409 ("\\(\e$(5!&!i\e(B\\)" . "\e$(5#&\e(B")
410 ("\\(\e$(5!'!i\e(B\\)" . "\e$(5#'\e(B")
411 ("\\(\e$(5!*!i\e(B\\)" . "\e$(5#*\e(B")
412 ("\\(\e$(5!3!i\e(B\\)" . "\e$(5#3\e(B")
413 ("\\(\e$(5!4!i\e(B\\)" . "\e$(5#4\e(B")
414 ("\\(\e$(5!5!i\e(B\\)" . "\e$(5#5\e(B")
415 ("\\(\e$(5!:!i\e(B\\)" . "\e$(5#:\e(B")
416 ("\\(\e$(5!?!i\e(B\\)" . "\e$(5#?\e(B")
417 ("\\(\e$(5!@!i\e(B\\)" . "\e$(5#@\e(B")
418 ("\\(\e$(5!I!i\e(B\\)" . "\e$(5#I\e(B")
419 ("\\(\e$(5!j!i\e(B\\)" . "\e$(5#J\e(B")
420
421 ;; Half forms.
422 ("\\(\e$(5!3!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"3\e(B")
423 ("\\(\e$(5!4!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"4\e(B")
424 ("\\(\e$(5!5!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"5\e(B")
425 ("\\(\e$(5!6!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"6\e(B")
426 ("\\(\e$(5!8!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"8\e(B")
427 ("\\(\e$(5!:!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5":\e(B")
428 ("\\(\e$(5!;!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5";\e(B")
429 ("\\(\e$(5!<!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"<\e(B")
430 ("\\(\e$(5!A!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"A\e(B")
431 ("\\(\e$(5!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"B\e(B")
432 ("\\(\e$(5!C!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"C\e(B")
433 ("\\(\e$(5!E!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"E\e(B")
434 ("\\(\e$(5!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"F\e(B")
435 ("\\(\e$(5!G!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"G\e(B")
436 ("\\(\e$(5!H!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"H\e(B")
437 ("\\(\e$(5!I!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"I\e(B")
438 ("\\(\e$(5!J!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"J\e(B")
439 ("\\(\e$(5!K!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"K\e(B")
440 ("\\(\e$(5!L!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"L\e(B")
441 ("\\(\e$(5!M!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"M\e(B")
442 ("\\(\e$(5!N!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"N\e(B")
443 ("\\(\e$(5!Q!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"Q\e(B")
444 ("\\(\e$(5!R!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"R\e(B")
445 ("\\(\e$(5!S!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"S\e(B")
446 ("\\(\e$(5!T!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"T\e(B")
447 ("\\(\e$(5!U!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"U\e(B")
448 ("\\(\e$(5!V!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"V\e(B")
449 ("\\(\e$(5!W!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"W\e(B")
450 )
451 "Alist of regexps of Devanagari character sequences vs composed characters.")
452
453 ;; Example:
454 ;;("\\(\e$(5!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"F\e(B")
455 ;;(string-match "\\(\e$(5!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" "\e$(5!X![!F!h!D!\\e(B") => 8
456 ;;(match-end 1) => 16
457
458 ;;
459 ;; Defining character properties : char-to-glyph, glyph-to-char
460 ;;
461 ;; * If char-to-glyph is non-nil, it would be one of the following forms.
462 ;;
463 ;; (("character-regexp" . "glyphs")
464 ;; .....) or
465 ;; (("character-regexp" . ?glyph)
466 ;; .....) or
467 ;; ("characters-regexp" . "glyphs")
468 ;; or
469 ;; ?glyph
470 ;;
471 ;; * If glyph-to-char is non-nil, it would be one of the following forms.
472 ;;
473 ;; (("glyph-regexp" . "characters") ;; This is the only case in Devanagari
474 ;; ....) or
475 ;; (("glyph-regexp" . ?character)
476 ;; ....) or
477 ;; ("glyph-regexp" . "characters")
478 ;; or
479 ;; "characters"
480 ;; or
481 ;; ?character
482 ;;
483
484 (let ((rules devanagari-char-to-glyph-rules))
485 (while rules
486 (let ((rule (car rules))
487 (chars) (char) (glyph))
488 (setq rules (cdr rules))
489 (string-match "\\\\(\\(.+\\)\\\\)" (car rule))
490 (setq chars (substring (car rule) (match-beginning 1) (match-end 1)))
491 (setq char (string-to-char chars))
492 (setq glyph (string-to-char (cdr rule))) ; assume one glyph in devan.
493 (put-char-code-property
494 char 'char-to-glyph
495 (append (get-char-code-property char 'char-to-glyph) (list rule)))
496 (put-char-code-property glyph 'glyph-to-char chars))))
497
498 ;;
499 ;; Convert Character Code to Glyph Code
500 ;;
501
502 ;;;###autoload
503 (defun char-to-glyph-devanagari (src-str)
504 "Convert Devanagari characters in the string to Devanagari glyphs.
505 Ligatures and special rules are processed."
506 (let ((pos 0)
507 (dst-str ""))
508 (while (< pos (length src-str))
509 (let ((found nil)
510 (rules (get-char-code-property
511 (string-to-char
512 ;; caution. other forms not supported for now.
513 (substring src-str pos)) 'char-to-glyph)))
514 (while rules
515 (let* ((rule (car rules))
516 (regexp (car rule)))
517 (if (string-match regexp src-str)
518 (if (= (match-beginning 1) pos)
519 (progn
520 (setq dst-str (concat dst-str (cdr rule)))
521 (setq rules nil) ; Get out of the loop.
522 (setq found t)
523 ;; proceed `pos' for replaced characters.
524 (setq pos (match-end 1)))
525 (setq rules (cdr rules)))
526 (setq rules (cdr rules)))))
527 ;; proceed to next position
528 (if (not found)
529 (let ((nextchar (string-to-char (substring src-str pos))))
530 (setq pos (+ pos
531 (char-bytes (string-to-char (substring src-str pos)))))
532 (setq dst-str (concat dst-str (char-to-string nextchar)))))))
533 dst-str))
534
535 ;; Example:
536 ;;(char-to-glyph-devanagari "\e$(5!X![!F!h!D!\\e(B") => "\e$(5!X!["F!D!\\e(B"
537 ;;(char-to-glyph-devanagari "\e$(5!O!Z!V!h!=!h!O![!M\e(B") => ???
538
539 ;;
540 ;; Phase 2: Compose Glyphs to form One Glyph.
541 ;;
542
543 ;; Each list consist of glyph, application-priority and application-direction.
544 ;;
545 ;; Glyphs will be ordered from low priority number to high priority number.
546 ;; If application-priority is omitted, it is assumed to be 0.
547 ;; If application-direction is omitted, it is asumbed to be '(mr . ml).
548
549 (defconst devanagari-composition-rules
550 '((?\e$(5!!\e(B 60 (tr . br))
551 (?\e$(5!"\e(B 60 (tr . br))
552 (?\e$(5!#\e(B 60)
553 (?\e$(5!$\e(B 0)
554 (?\e$(5!%\e(B 0)
555 (?\e$(5!&\e(B 0)
556 (?\e$(5!'\e(B 0)
557 (?\e$(5!(\e(B 0)
558 (?\e$(5!)\e(B 0)
559 (?\e$(5!*\e(B 0)
560 (?\e$(5!+\e(B 0)
561 (?\e$(5!,\e(B 0)
562 (?\e$(5!-\e(B 0)
563 (?\e$(5!.\e(B 0)
564 (?\e$(5!/\e(B 0)
565 (?\e$(5!0\e(B 0)
566 (?\e$(5!1\e(B 0)
567 (?\e$(5!2\e(B 0)
568 (?\e$(5!3\e(B 0)
569 (?\e$(5!4\e(B 0)
570 (?\e$(5!5\e(B 0)
571 (?\e$(5!6\e(B 0)
572 (?\e$(5!7\e(B 0)
573 (?\e$(5!8\e(B 0)
574 (?\e$(5!9\e(B 0)
575 (?\e$(5!:\e(B 0)
576 (?\e$(5!;\e(B 0)
577 (?\e$(5!<\e(B 0)
578 (?\e$(5!=\e(B 0)
579 (?\e$(5!>\e(B 0)
580 (?\e$(5!?\e(B 0)
581 (?\e$(5!@\e(B 0)
582 (?\e$(5!A\e(B 0)
583 (?\e$(5!B\e(B 0)
584 (?\e$(5!C\e(B 0)
585 (?\e$(5!D\e(B 0)
586 (?\e$(5!E\e(B 0)
587 (?\e$(5!F\e(B 0)
588 (?\e$(5!G\e(B 0)
589 (?\e$(5!H\e(B 0)
590 (?\e$(5!I\e(B 0)
591 (?\e$(5!J\e(B 0)
592 (?\e$(5!K\e(B 0)
593 (?\e$(5!L\e(B 0)
594 (?\e$(5!M\e(B 0)
595 (?\e$(5!N\e(B 0)
596 (?\e$(5!O\e(B 0)
597 (?\e$(5!P\e(B 0)
598 (?\e$(5!Q\e(B 0)
599 (?\e$(5!R\e(B 0)
600 (?\e$(5!S\e(B 0)
601 (?\e$(5!T\e(B 0)
602 (?\e$(5!U\e(B 0)
603 (?\e$(5!V\e(B 0)
604 (?\e$(5!W\e(B 0)
605 (?\e$(5!X\e(B 0)
606 (?\e$(5!Y\e(B 0)
607 (?\e$(5!Z\e(B 40)
608 (?\e$(5![\e(B 40 (ml . mr))
609 (?\e$(5!\\e(B 40)
610 (?\e$(5!]\e(B 40 (bc . tc))
611 (?\e$(5!^\e(B 40 (bc . tc))
612 (?\e$(5!_\e(B 40 (bc . tc))
613 (?\e$(5!`\e(B 40 (tc . bc))
614 (?\e$(5!a\e(B 40 (tc . bc))
615 (?\e$(5!b\e(B 40 (tc . bc))
616 (?\e$(5!c\e(B 40 (tc . bc))
617 (?\e$(5!d\e(B 40)
618 (?\e$(5!e\e(B 40)
619 (?\e$(5!f\e(B 40)
620 (?\e$(5!g\e(B 40)
621 (?\e$(5!h\e(B 0 (br . tr)) ; Halant's special treatment.
622 (?\e$(5!i\e(B 0 (br . tr)) ; Nukta's special treatment.
623 (?\e$(5!j\e(B 0)
624 (nil 0)
625 (nil 0)
626 (nil 0)
627 (nil 0)
628 (nil 0)
629 (nil 0)
630 (?\e$(5!q\e(B 0)
631 (?\e$(5!r\e(B 0)
632 (?\e$(5!s\e(B 0)
633 (?\e$(5!t\e(B 0)
634 (?\e$(5!u\e(B 0)
635 (?\e$(5!v\e(B 0)
636 (?\e$(5!w\e(B 0)
637 (?\e$(5!x\e(B 0)
638 (?\e$(5!y\e(B 0)
639 (?\e$(5!z\e(B 0)
640 (nil 0)
641 (nil 0)
642 (nil 0)
643 (nil 0)
644 (?\e$(5"!\e(B 0)
645 (?\e$(5""\e(B 0)
646 (?\e$(5"#\e(B 0)
647 (?\e$(5"$\e(B 0)
648 (?\e$(5"%\e(B 0)
649 (?\e$(5"&\e(B 0)
650 (?\e$(5"'\e(B 0)
651 (?\e$(5"(\e(B 0)
652 (?\e$(5")\e(B 0)
653 (?\e$(5"*\e(B 0)
654 (?\e$(5"+\e(B 0)
655 (?\e$(5",\e(B 0)
656 (?\e$(5"-\e(B 0)
657 (?\e$(5".\e(B 0)
658 (?\e$(5"/\e(B 0)
659 (?\e$(5"0\e(B 0)
660 (?\e$(5"1\e(B 0)
661 (?\e$(5"2\e(B 0)
662 (?\e$(5"3\e(B 0)
663 (?\e$(5"4\e(B 0)
664 (?\e$(5"5\e(B 0)
665 (?\e$(5"6\e(B 0)
666 (?\e$(5"7\e(B 0)
667 (?\e$(5"8\e(B 0)
668 (?\e$(5"9\e(B 0)
669 (?\e$(5":\e(B 0)
670 (?\e$(5";\e(B 0)
671 (?\e$(5"<\e(B 0)
672 (?\e$(5"=\e(B 0)
673 (?\e$(5">\e(B 0)
674 (?\e$(5"?\e(B 0)
675 (?\e$(5"@\e(B 0)
676 (?\e$(5"A\e(B 0)
677 (?\e$(5"B\e(B 0)
678 (?\e$(5"C\e(B 0)
679 (?\e$(5"D\e(B 0)
680 (?\e$(5"E\e(B 0)
681 (?\e$(5"F\e(B 0)
682 (?\e$(5"G\e(B 0)
683 (?\e$(5"H\e(B 0)
684 (?\e$(5"I\e(B 0)
685 (?\e$(5"J\e(B 0)
686 (?\e$(5"K\e(B 0)
687 (?\e$(5"L\e(B 0)
688 (?\e$(5"M\e(B 0)
689 (?\e$(5"N\e(B 0)
690 (?\e$(5"O\e(B 0)
691 (?\e$(5"P\e(B 0)
692 (?\e$(5"Q\e(B 0)
693 (?\e$(5"R\e(B 0)
694 (?\e$(5"S\e(B 0)
695 (?\e$(5"T\e(B 0)
696 (?\e$(5"U\e(B 0)
697 (?\e$(5"V\e(B 0)
698 (?\e$(5"W\e(B 0)
699 (?\e$(5"X\e(B 0)
700 (?\e$(5"Y\e(B 0)
701 (?\e$(5"Z\e(B 0)
702 (?\e$(5"[\e(B 0)
703 (?\e$(5"\\e(B 0)
704 (?\e$(5"]\e(B 0)
705 (?\e$(5"^\e(B 0)
706 (?\e$(5"_\e(B 0)
707 (?\e$(5"`\e(B 0)
708 (?\e$(5"a\e(B 0)
709 (?\e$(5"b\e(B 0)
710 (?\e$(5"c\e(B 0)
711 (?\e$(5"d\e(B 0)
712 (?\e$(5"e\e(B 0)
713 (?\e$(5"f\e(B 0)
714 (?\e$(5"g\e(B 0)
715 (?\e$(5"h\e(B 0)
716 (?\e$(5"i\e(B 0)
717 (?\e$(5"j\e(B 0)
718 (?\e$(5"k\e(B 0)
719 (?\e$(5"l\e(B 0)
720 (?\e$(5"m\e(B 0)
721 (?\e$(5"n\e(B 0)
722 (?\e$(5"o\e(B 0)
723 (?\e$(5"p\e(B 20 (tr . br))
724 (?\e$(5"q\e(B 20 (br . tr))
725 (?\e$(5"r\e(B 20 (br . tr))
726 (?\e$(5"s\e(B 0)
727 (?\e$(5"t\e(B 0)
728 (?\e$(5"u\e(B 0)
729 (?\e$(5"v\e(B 0)
730 (?\e$(5"w\e(B 0)
731 (?\e$(5"x\e(B 0)
732 (?\e$(5"y\e(B 0)
733 (?\e$(5"z\e(B 0)
734 (?\e$(5"{\e(B 0)
735 (?\e$(5"|\e(B 0)
736 (?\e$(5"}\e(B 0)
737 (?\e$(5"~\e(B 0)
738 (?\e$(5#!\e(B 0)
739 (?\e$(5#"\e(B 0)
740 (?\e$(5##\e(B 0)
741 (?\e$(5#$\e(B 0)
742 (?\e$(5#%\e(B 0)
743 (?\e$(5#&\e(B 0)
744 (?\e$(5#'\e(B 0)
745 (?\e$(5#(\e(B 0)
746 (?\e$(5#)\e(B 0)
747 (?\e$(5#*\e(B 0)
748 (?\e$(5#+\e(B 0)
749 (?\e$(5#,\e(B 0)
750 (?\e$(5#-\e(B 0)
751 (?\e$(5#.\e(B 0)
752 (?\e$(5#/\e(B 0)
753 (?\e$(5#0\e(B 0)
754 (?\e$(5#1\e(B 0)
755 (?\e$(5#2\e(B 0)
756 (?\e$(5#3\e(B 0)
757 (?\e$(5#4\e(B 0)
758 (?\e$(5#5\e(B 0)
759 (?\e$(5#6\e(B 0)
760 (?\e$(5#7\e(B 0)
761 (?\e$(5#8\e(B 0)
762 (?\e$(5#9\e(B 0)
763 (?\e$(5#:\e(B 0)
764 (?\e$(5#;\e(B 0)
765 (?\e$(5#<\e(B 0)
766 (?\e$(5#=\e(B 0)
767 (?\e$(5#>\e(B 0)
768 (?\e$(5#?\e(B 0)
769 (?\e$(5#@\e(B 0)
770 (?\e$(5#A\e(B 0)
771 (?\e$(5#B\e(B 0)
772 (?\e$(5#C\e(B 0)
773 (?\e$(5#D\e(B 0)
774 (?\e$(5#E\e(B 0)
775 (?\e$(5#F\e(B 0)
776 (?\e$(5#G\e(B 0)
777 (?\e$(5#H\e(B 0)
778 (?\e$(5#I\e(B 0)
779 (?\e$(5#J\e(B 0)
780 (?\e$(5#K\e(B 0)
781 (?\e$(5#L\e(B 0)
782 (?\e$(5#M\e(B 0)
783 (?\e$(5#N\e(B 0)
784 (?\e$(5#O\e(B 0)
785 (?\e$(5#P\e(B 0)
786 (?\e$(5#Q\e(B 0)
787 (?\e$(5#R\e(B 0)
788 (?\e$(5#S\e(B 0)
789 (?\e$(5#T\e(B 0)
790 (?\e$(5#U\e(B 0)
791 (?\e$(5#V\e(B 0)
792 (?\e$(5#W\e(B 0)
793 (?\e$(5#X\e(B 0)
794 (?\e$(5#Y\e(B 0)
795 (?\e$(5#Z\e(B 0)
796 (?\e$(5#[\e(B 0)
797 (?\e$(5#\\e(B 0)
798 (?\e$(5#]\e(B 0)
799 (?\e$(5#^\e(B 0)
800 (?\e$(5#_\e(B 0)
801 (?\e$(5#`\e(B 0)
802 (?\e$(5#a\e(B 0)
803 (?\e$(5#b\e(B 0)
804 (?\e$(5#c\e(B 0)
805 (?\e$(5#d\e(B 0)
806 (?\e$(5#e\e(B 0)
807 (?\e$(5#f\e(B 0)
808 (?\e$(5#g\e(B 0)
809 (?\e$(5#h\e(B 0)
810 (?\e$(5#i\e(B 0)
811 (?\e$(5#j\e(B 0)
812 (?\e$(5#k\e(B 0)
813 (?\e$(5#l\e(B 0)
814 (?\e$(5#m\e(B 0)
815 (?\e$(5#n\e(B 0)
816 (?\e$(5#o\e(B 0)
817 (?\e$(5#p\e(B 0)
818 (?\e$(5#q\e(B 0)
819 (?\e$(5#r\e(B 0)
820 (?\e$(5#s\e(B 0)
821 (?\e$(5#t\e(B 0)
822 (?\e$(5#u\e(B 0)
823 (?\e$(5#v\e(B 0)
824 (?\e$(5#w\e(B 0)
825 (?\e$(5#x\e(B 0)
826 (?\e$(5#y\e(B 0)
827 (?\e$(5#z\e(B 0)
828 (?\e$(5#{\e(B 0)
829 (?\e$(5#|\e(B 0)
830 (?\e$(5#}\e(B 0)
831 (?\e$(5#~\e(B 0)
832 (?\e$(5$!\e(B 0)
833 (?\e$(5$"\e(B 0)
834 (?\e$(5$#\e(B 0)
835 (?\e$(5$$\e(B 0)
836 (?\e$(5$%\e(B 0)
837 (?\e$(5$&\e(B 0)
838 (?\e$(5$'\e(B 0)
839 (?\e$(5$(\e(B 0)
840 (?\e$(5$)\e(B 0)
841 (?\e$(5$*\e(B 0)
842 (?\e$(5$+\e(B 0)
843 (?\e$(5$,\e(B 0)
844 (?\e$(5$-\e(B 0)
845 (?\e$(5$.\e(B 0)
846 (?\e$(5$/\e(B 0)
847 (?\e$(5$0\e(B 0)
848 (?\e$(5$1\e(B 0)
849 (?\e$(5$2\e(B 0)
850 (?\e$(5$3\e(B 0)
851 (?\e$(5$4\e(B 0)
852 (?\e$(5$5\e(B 0)
853 (?\e$(5$6\e(B 0)
854 (?\e$(5$7\e(B 0)
855 (?\e$(5$8\e(B 0)
856 (?\e$(5$9\e(B 0)
857 (?\e$(5$:\e(B 0)
858 (?\e$(5$;\e(B 0)
859 (?\e$(5$<\e(B 0)
860 (?\e$(5$=\e(B 0)
861 (?\e$(5$>\e(B 0)
862 (?\e$(5$?\e(B 0)
863 (?\e$(5$@\e(B 0)
864 (?\e$(5$A\e(B 0)
865 (?\e$(5$B\e(B 0)
866 (?\e$(5$C\e(B 0)
867 (?\e$(5$D\e(B 0)
868 (?\e$(5$E\e(B 0)
869 (?\e$(5$F\e(B 0)
870 (?\e$(5$G\e(B 0)
871 (?\e$(5$H\e(B 0)
872 (?\e$(5$I\e(B 0)
873 (?\e$(5$J\e(B 0)
874 (?\e$(5$K\e(B 0)
875 (?\e$(5$L\e(B 0)
876 (?\e$(5$M\e(B 0)
877 (?\e$(5$N\e(B 0)
878 (?\e$(5$O\e(B 0)
879 (?\e$(5$P\e(B 0)
880 (?\e$(5$Q\e(B 0)
881 (?\e$(5$R\e(B 0)
882 (?\e$(5$S\e(B 0)
883 (?\e$(5$T\e(B 0)
884 (?\e$(5$U\e(B 0)
885 (?\e$(5$V\e(B 0)
886 (?\e$(5$W\e(B 0)
887 (?\e$(5$X\e(B 0)
888 (?\e$(5$Y\e(B 0)
889 (?\e$(5$Z\e(B 0)
890 (?\e$(5$[\e(B 0)
891 (?\e$(5$\\e(B 0)
892 (?\e$(5$]\e(B 0)
893 (?\e$(5$^\e(B 0)
894 (?\e$(5$_\e(B 0)
895 (?\e$(5$`\e(B 0)
896 (?\e$(5$a\e(B 0)
897 (?\e$(5$b\e(B 0)
898 (?\e$(5$c\e(B 0)
899 (?\e$(5$d\e(B 0)
900 (?\e$(5$e\e(B 0)
901 (?\e$(5$f\e(B 0)
902 (?\e$(5$g\e(B 0)
903 (?\e$(5$h\e(B 0)
904 (?\e$(5$i\e(B 0)
905 (?\e$(5$j\e(B 0)
906 (?\e$(5$k\e(B 0)
907 (?\e$(5$l\e(B 0)
908 (?\e$(5$m\e(B 0)
909 (?\e$(5$n\e(B 0)
910 (?\e$(5$o\e(B 0)
911 (?\e$(5$p\e(B 0)
912 (?\e$(5$q\e(B 0)
913 (?\e$(5$r\e(B 0)
914 (?\e$(5$s\e(B 0)
915 (?\e$(5$t\e(B 0)
916 (?\e$(5$u\e(B 0)
917 (?\e$(5$v\e(B 0)
918 (?\e$(5$w\e(B 0)
919 (?\e$(5$x\e(B 0)
920 (?\e$(5$y\e(B 0)
921 (?\e$(5$z\e(B 0)
922 (?\e$(5${\e(B 0)
923 (?\e$(5$|\e(B 0)
924 (?\e$(5$}\e(B 0)
925 (?\e$(5$~\e(B 0)
926 ))
927
928 ;; Determine composition priority and rule of the array of Glyphs.
929 ;; Sort the glyphs with their priority.
930
931 ;; Example:
932 ;;(devanagari-reorder-glyph-for-composition '[?\e$(5"5\e(B ?\e$(5!X\e(B ?\e$(5![\e(B])
933 ;; => ((446680 0) (446773 0) (446683 50 (ml . mr)))
934
935 (defun devanagari-reorder-glyph-for-composition (glyph-alist)
936 (let* ((pos 0)
937 (ordered-glyphs '()))
938 (while (< pos (length glyph-alist))
939 (let* ((glyph (aref glyph-alist pos)))
940 (setq pos (1+ pos))
941 (setq ordered-glyphs
942 (append ordered-glyphs (list (assq glyph devanagari-composition-rules))))))
943 (sort ordered-glyphs '(lambda (x y) (< (car (cdr x)) (car (cdr y)))))))
944
945 ;;(devanagari-compose-to-one-glyph "\e$(5"5!X![\e(B") => "\e2\e$(6!XP"5@![\e1\e(B"
946
947 (defun devanagari-compose-to-one-glyph (devanagari-string)
948 (let* ((o-glyph-list (devanagari-reorder-glyph-for-composition
949 (string-to-vector devanagari-string)))
950 ;; List of glyphs to be composed.
951 (cmp-glyph-list (list (car (car o-glyph-list))))
952 (o-glyph-list (cdr o-glyph-list)))
953 (while o-glyph-list
954 (let* ((o-glyph (car o-glyph-list))
955 (glyph (if (< 2 (length o-glyph))
956 ;; default composition
957 (list (car (cdr (cdr o-glyph))) (car o-glyph))
958 ;; composition with a specified rule
959 (list '(mr . ml) (car o-glyph)))))
960 (setq o-glyph-list (cdr o-glyph-list))
961 (setq cmp-glyph-list (append cmp-glyph-list glyph))))
962 ;; Before applying compose-chars, convert glyphs to
963 ;; 1-column width if possible.
964 (setq cmp-glyph-list (devanagari-wide-to-narrow cmp-glyph-list))
965 (if (= (length cmp-glyph-list) 1) (char-to-string (car cmp-glyph-list))
966 (apply 'compose-chars cmp-glyph-list))))
967
968
969 ;;
970 ;; Phase 2.5 Convert Appropriate Character to 1-column shape.
971 ;;
972 ;; This is temporary and should be removed out when Emacs supports
973 ;; variable width characters.
974 ;;
975 ;; This will convert the composing glyphs (2 column glyphs)
976 ;; to narrow (1 column) glyphs if they exist.
977 ;;
978 ;; devanagari-wide-to-narrow-old converts glyphs simply.
979 ;; devanagari-wide-to-narrow takes care of upper/lower apply-glyphs
980 ;; with 2 column base-glyph.
981 ;;
982 ;; Execution Examples
983 ;;(devanagari-wide-to-narrow '(446680 446773 (ml . mr) 446683))
984 ;;(devanagari-wide-to-narrow '(?\e$(5!6\e(B (ml . ml) 446773 (tc . mr) 446683))
985
986 (defun devanagari-wide-to-narrow (src-list)
987 (if (null src-list) '()
988 (cons
989 (if (and (numberp (car src-list))
990 (cdr (assq (car src-list) devanagari-1-column-char)))
991 (cdr (assq (car src-list) devanagari-1-column-char))
992 (car src-list))
993 (devanagari-wide-to-narrow (cdr src-list)))))
994
995 ;; Make this function obsolete temporary Because now Emacs supports
996 ;; attaching 1 column character at the center 2 column char. However,
997 ;; there are still problems attempting to attach Halant or Nukta sign
998 ;; at the non-vowel consonant. This problem can not be solved until
999 ;; Emacs supports attaching the glyph at `temporary-preserved metric'.
1000
1001 (defun devanagari-wide-to-narrow-old (src-list)
1002 (if (null src-list) (progn (error "devanagari-wide-to-narrow error") nil)
1003 (let* ((base-glyph (cdr (assq (car src-list) devanagari-1-column-char)))
1004 (wide-base-glyph nil)
1005 (apply-glyph-list (cdr src-list)))
1006 (if (null base-glyph)
1007 (progn
1008 (setq wide-base-glyph t)
1009 (setq base-glyph (car src-list))))
1010 (cons base-glyph
1011 (devanagari-wide-to-narrow-iter apply-glyph-list wide-base-glyph))
1012 )))
1013
1014 ;; Convert apply-glyph-list from 2-column to 1-column.
1015 ;; wide-base-glyph is t when base-glyph is 2-column.
1016 ;; When apply-glyph is put at the top or bottom of 2-column base-glyph,
1017 ;; they must be 2-column glyph, too. Otherwise, they will be
1018 ;; converted to 1-column glyph if possible.
1019
1020 (defun devanagari-wide-to-narrow-iter (apply-glyph-list wide-base-glyph)
1021 (if (< (length apply-glyph-list) 2) '()
1022 (let* ((apply-dir (car apply-glyph-list))
1023 (apply-glyph (car (cdr apply-glyph-list)))
1024 (apply-rest (cdr (cdr apply-glyph-list)))
1025 (put-t-or-b (member (car apply-dir) '(tl tc tr bl bc br)))
1026 (narrow-glyph (cdr (assq apply-glyph devanagari-1-column-char))))
1027 (append
1028 (list apply-dir
1029 (if (or (and wide-base-glyph put-t-or-b)
1030 (null narrow-glyph))
1031 apply-glyph narrow-glyph))
1032 (devanagari-wide-to-narrow-iter apply-rest wide-base-glyph)))))
1033
1034 ;;
1035 ;; Summary
1036 ;;
1037
1038 ;;;###autoload
1039 (defun devanagari-compose-string (str)
1040 (let ((len (length str))
1041 (src str) (dst "") rest match-b match-e)
1042 (while (string-match devanagari-composite-glyph-unit-examine src)
1043 (setq match-b (match-beginning 0) match-e (match-end 0))
1044 (setq dst
1045 (concat dst
1046 (substring src 0 match-b)
1047 (devanagari-compose-to-one-glyph
1048 (char-to-glyph-devanagari
1049 (substring src match-b match-e)))))
1050 (setq src (substring src match-e)))
1051 (setq dst (concat dst src))
1052 dst))
1053
1054 ;;;###autoload
1055 (defun devanagari-compose-region (from to)
1056 (interactive "r")
1057 (save-restriction
1058 (narrow-to-region from to)
1059 (goto-char (point-min))
1060 (while (re-search-forward devanagari-composite-glyph-unit-examine nil t)
1061 (let* ((match-b (match-beginning 0)) (match-e (match-end 0))
1062 (cmps (devanagari-compose-to-one-glyph
1063 (char-to-glyph-devanagari
1064 (buffer-substring match-b match-e)))))
1065 (delete-region match-b match-e)
1066 (insert cmps)))))
1067
1068 ;;
1069 ;; Decomposition of composite font.
1070 ;;
1071
1072 (defun devanagari-normalize-narrow-glyph (charlist)
1073 (let ((wide-char (car (rassoc (car charlist) devanagari-1-column-char))))
1074 (if (null charlist) nil
1075 (cons (if (null wide-char) (car charlist) wide-char)
1076 (devanagari-normalize-narrow-glyph (cdr charlist))))))
1077
1078 (defvar devanagari-decomposition-rules
1079 '(
1080 (?\e$(5"p\e(B -20)
1081 )
1082 )
1083
1084 (defun devanagari-reorder-glyph-for-decomposition (glyphlist)
1085 "This function re-orders glyph list."
1086 (sort glyphlist
1087 '(lambda (x y)
1088 (let ((xx (assoc x devanagari-decomposition-rules))
1089 (yy (assoc y devanagari-decomposition-rules)))
1090 (if (null xx) (setq xx 0))
1091 (if (null yy) (setq yy 0))
1092 (< xx yy)))))
1093
1094 (defun devanagari-decompose-char (char)
1095 "This function decomposes one Devanagari composite character to
1096 basic Devanagari character."
1097 (let ((glyphlist (decompose-composite-char char)))
1098 (if (not (listp glyphlist))
1099 (setq glyphlist (list glyphlist)))
1100 (setq glyphlist (devanagari-normalize-narrow-glyph glyphlist))
1101 (mapconcat '(lambda (x) (let ((char (get-char-code-property
1102 x 'glyph-to-char)))
1103 (if (null char) (char-to-string x) char)))
1104 (devanagari-reorder-glyph-for-decomposition glyphlist)
1105 "")))
1106
1107 ;;;###autoload
1108 (defun devanagari-decompose-string (str)
1109 "This function Decomposes Devanagari glyph string to
1110 basic Devanagari character string."
1111 (let ((src str) (dst ""))
1112 (while (not (equal src ""))
1113 (let* ((char (string-to-char src))
1114 (clen (char-bytes char)))
1115 (setq src (substring src clen))
1116 (setq dst (concat dst
1117 (devanagari-decompose-char char)))))
1118 dst))
1119
1120 ;;;###autoload
1121 (defun devanagari-decompose-region (from to)
1122 (interactive "r")
1123 (save-restriction
1124 (narrow-to-region from to)
1125 (goto-char (point-min))
1126 (while (re-search-forward "." nil t)
1127 (let* ((match-b (match-beginning 0)) (match-e (match-end 0))
1128 (decmps (devanagari-decompose-string (buffer-substring match-b match-e))))
1129 (delete-char -1)
1130 (insert decmps)))))
1131
1132
1133
1134 ;; For pre-write and post-read conversion
1135
1136 ;;;###autoload
1137 (defun devanagari-compose-from-is13194-region (from to)
1138 "Compose IS 13194 characters in the region to Devanagari characters."
1139 (interactive "r")
1140 (save-restriction
1141 (narrow-to-region from to)
1142 (indian-to-devanagari-region (point-min) (point-max))
1143 (devanagari-compose-region (point-min) (point-max))))
1144
1145 ;;;###autoload
1146 (defun devanagari-decompose-to-is13194-region (from to)
1147 "Decompose Devanagari characters in the region to IS 13194 characters."
1148 (interactive "r")
1149 (save-restriction
1150 (narrow-to-region from to)
1151 (devanagari-decompose-region (point-min) (point-max))
1152 (devanagari-to-indian-region (point-min) (point-max))))
1153
1154 ;;
1155 (provide 'language/devan-util)
1156
1157 ;;; Local Variables:
1158 ;;; generated-autoload-file: "../loaddefs.el"
1159 ;;; End:
1160 ;;; devan-util.el ends here