1 ;;; devan-util.el --- Support for Devanagari Script Composition
3 ;; Copyright (C) 1996 Free Software Foundation, Inc.
5 ;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
7 ;; Keywords: multilingual, Indian, Devanagari
9 ;; This file is part of GNU Emacs.
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to
23 ;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
28 ;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
30 ;; Devanagari script composition rules and related programs.
35 ;;; Steps toward composition of Devanagari Characters.
41 (defun indian-to-devanagari (ch)
42 "Convert IS 13194 characters to Devanagari basic characters."
43 (let ((charcodes (split-char ch
)))
44 (if (eq (car charcodes
) 'indian-is13194
)
45 (make-char 'indian-2-column ?
\x21 (nth 1 charcodes
))
49 (defun devanagari-to-indian (ch)
50 "Convert Devanagari basic characters to IS 13194 characters."
51 (let* ((charcodes (split-char ch
))
52 (charset (car charcodes
))
53 (code-h (car (cdr charcodes
))))
54 (if (and (eq (car charcodes
) 'indian-2-column
)
55 (= (nth 1 charcodes
) ?
\x21))
56 (make-char 'indian-is13194
(nth 2 charcodes
))
60 (defun indian-to-devanagari-region (from to
)
61 "Convert IS 13194 characters in region to Devanagari basic characters."
64 (narrow-to-region from to
)
65 (goto-char (point-min))
66 (while (re-search-forward "\\cd" nil t
)
67 (let* ((devanagari-char (indian-to-devanagari (preceding-char))))
69 (insert devanagari-char
)))))
72 (defun devanagari-to-indian-region (from to
)
73 "Convert Devanagari basic characters in region to Indian characters."
76 (narrow-to-region from to
)
77 (goto-char (point-min))
78 (while (re-search-forward "\\cD" nil t
) ; Devanagari Character Code.
79 (let* ((indian-char (devanagari-to-indian (preceding-char))))
81 (insert indian-char
)))))
84 (defun indian-to-devanagari-string (str)
85 "Convert Indian String to Devanagari Basic Character String."
86 (let ((pos 0) (dst "") (src str
) char
)
87 (while (not (equal src
""))
88 (setq char
(string-to-char src
))
89 (setq src
(substring src
(char-bytes char
)))
90 (setq dst
(concat dst
(char-to-string (indian-to-devanagari char
)))))
93 ;; Phase 0 - Determine whether the characters can be composed.
96 ;;; Regular expressions to split characters for composition.
99 ;; Indian script word contains one or more syllables.
100 ;; In BNF, it can be expressed as follows:
102 ;; Word ::= {Syllable} [Cons-Syllable]
103 ;; Syllable ::= Cons-Vowel-Syllable | Vowel-Syllable
104 ;; Vowel-Syllable ::= V[D]
105 ;; Cons-Vowel-Syllable ::= [Cons-Syllable] Full-Cons [M] [D]
106 ;; Cons-Syllable ::= [Pure-Cons] [Pure-Cons] Pure-Cons
107 ;; Pure-Cons ::= Full-Cons H
108 ;; Full-Cons ::= C [N]
110 ;; {} repeat, [] optional
112 ;; C - Consonant (\e$(5!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A!B!C!D!E\e(B
113 ;; \e$(5!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X\e(B)
114 ;; N - Nukta (\e$(5!i\e(B)
115 ;; H - Halant(\e$(5!h\e(B)
116 ;; V - Vowel (\e$(5!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2\e(B)
117 ;; D - Vowel Modifiers, i.e. Anuswar, Chandrabindu, Visarg (\e$(5!!!"\e(B)
118 ;; M - Matra (\e$(5!Z![!\!]!^!_!`!a!b!c!d!e!f!g\e(B)
120 ;; In Emacs, one syllable of Indian language is considered to be one
121 ;; composite glyph. If we expand the above expression, it would be:
123 ;; [[C [N] H] [C [N] H] C [N] H] C [N] [M] [D] | V [D]
125 ;; Therefore, in worst case, the consonant syllabe will consist of
126 ;; following characters.
128 ;; C N H C N H C N H C N M D
130 ;; On the other hand, incomplete consonant syllable before inputting
131 ;; base consonant must satisfy the following condition:
133 ;; [C [N] H] [C [N] H] C [N] H
135 ;; This is acceptable BEFORE proper consonant-syllable is input. The
136 ;; string which doesn't match with the above expression is invalid and
137 ;; thus must be fixed.
140 ;; Third case can be considered, which is acceptable syllable and can
141 ;; not add any code more.
143 ;; [[C [N] H] [C [N] H] C [N] H] C [N] [M] D
145 ;; However, to make editing possible even in this condition, we will
146 ;; not consider about this case.
148 (defconst devanagari-cons-syllable-examine
149 "\\(\\([\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?\\([\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?[\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?[\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?[\e$(5!Z\e(B-\e$(5!g\e(B]?[\e$(5!!!"\e(B]?
"
150 "Regexp matching to one Devanagari consonant syllable.
")
152 (defconst devanagari-cons-syllable-incomplete-examine
153 "\\([\e$
(5!3\e(B-\e$
(5!X
\e(B]\e$
(5!i
\e(B?
\e$
(5!h
\e(B\\)?
\\([\e$
(5!3\e(B-\e$
(5!X
\e(B]\e$
(5!i
\e(B?
\e$
(5!h
\e(B\\)?
[\e$
(5!3\e(B-\e$
(5!X
\e(B]\e$
(5!i
\e(B?
\e$
(5!h
\e(B$
"
154 "Regexp matching to one Devanagari incomplete consonant syllable.
")
156 (defconst devanagari-vowel-syllable-examine
157 "[\e$
(5!$
\e(B-\e$
(5!2\e(B][\e$
(5!!!"!#\e(B]?"
158 "Regexp matching to one Devanagari vowel syllable.")
161 ;; Also, digits and virams should be processed other than syllables.
163 ;; In IS 13194, Avagrah is obtained by Nukta after Viram, and
164 ;; OM is obtained by Nukta after Chandrabindu
166 (defconst devanagari-digit-viram-examine
167 "[\e$(5!q\e(B-\e$(5!z!j\e(B]")
168 (defconst devanagari-other-sign-examine
169 "[\e$(5!!!j\e(B]\e$(5!i\e(B")
171 (defconst devanagari-composite-glyph-unit-examine
172 (concat "\\(" devanagari-cons-syllable-incomplete-examine
173 "\\)\\|\\(" devanagari-vowel-syllable-examine
174 "\\)\\|\\(" devanagari-digit-viram-examine
175 "\\)\\|\\(" devanagari-cons-syllable-examine
176 "\\)\\|\\(" devanagari-other-sign-examine
"\\)")
177 "Regexp matching to Devanagari string to be composed form one glyph.")
179 ;;(put-charset-property charset-devanagari-1-column
180 ;; 'char-to-glyph 'devanagari-compose-string)
181 ;;(put-charset-property charset-devanagari-2-column
182 ;; 'char-to-glyph 'devanagari-compose-string)
186 ;;(string-match devanagari-cons-syllable-examine "\e$(5!X![\e(B") => 0
187 ;;(string-match devanagari-cons-syllable-examine "\e$(5!F!h!D!\\e(B") => 0
188 ;;(string-match devanagari-cons-syllable-examine "\e$(5!X![!F!h!D!\\e(B") => 0
191 ;; Steps toward the composition
192 ;; Converting Character Code to Composite Glyph.
194 ;; Example : \e$(5!X![\e(B/\e$(5!F!h!D!\\e(B
196 ;; First, convert Characters to appropriate glyphs.
198 ;; => \e$(5!X![\e(B/\e$(5"F!D!\\e(B
200 ;; Then, determine the base glyph, apply-orders and apply-rules.
202 ;; => \e$(5!X\e(B (ml.mr) \e$(5![\e(B / \e$(5!D\e(B (ml.mr) \e$(5"F\e(B (mr ml) \e$(5!\\e(B
204 ;; Finally, convert 2-column glyphs to 1-column glyph
205 ;; if such a glyph exist.
207 ;; => \e$(6![\e(B (ml.mr) \e$(6!X\e(B / \e$(6!D\e(B (ml.mr) \e$(6"F\e(B (mr ml) \e$(6!\\e(B
209 ;; Compose the glyph.
211 ;; => \e2\e$(6!X@![\e1\e(B/\e2\e$(6!D@"FP!\\e1\e(B
212 ;; => \e2\e$(6!X@![\e1\e2!D@"FP!\\e1\e(B
216 ;; Phase 1: Converting Character Code to Glyph Code.
220 ;; There may be many rules which you many want to be suppressed.
221 ;; In that case, please comment out that rule.
223 ;; RULES WILL BE EVALUATED FROM FIRST TO LAST.
224 ;; PUT MORE SPECIFIC RULES FIRST.
227 ;; Prepare multiple specific list of rules for each languages
228 ;; which adopts Devanagari script.
232 (defconst devanagari-char-to-glyph-rules
234 ;; special form for "ru".
235 ("\\(\e$(5!O!]\e(B\\)" .
"\e$(5",\e(B")
236 ("\\(\e$
(5!O
!^
\e(B\\)" . "\e$
(5"-\e(B")
237 ("\\(\e$(5!P!]\e(B\\)" .
"\e$(5".
\e(B")
238 ("\\(\e$
(5!P
!^
\e(B\\)" . "\e$
(5"/\e(B")
240 ;; `r' at the top of syllable and followed by other consonants.
241 ;; ("[^\e$(5!h\e(B]\\(\e$(5!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"p\e(B")
242 ("^\\(\e$(5!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"p
\e(B")
244 ;; If "r
" is preceded by the vowel-suppressed consonant
245 ;; (especially those with vertical line), it will be written as
246 ;; slanted line below the preceding consonant character. Some of
247 ;; them are pre-composed as one glyph.
249 ("\\(\e$
(5!:!i
!h
!O
\e(B\\)" . "\e$
(5"!\e(B")
250 ("\\(\e$(5!I!i!h!O\e(B\\)" .
"\e$(5""\e(B")
251 ("\\(\e$(5!3!h!O\e(B\\)" .
"\e$(5"#\e(B")
252 ("\\(\e$
(5!:!h
!O
\e(B\\)" . "\e$
(5"$\e(B")
253 ("\\(\e$(5!B!h!O\e(B\\)" .
"\e$(5"%
\e(B")
254 ("\\(\e$
(5!H
!h
!O
\e(B\\)" . "\e$
(5"&\e(B")
255 ("\\(\e$(5!I!h!O\e(B\\)" .
"\e$(5"'\e(B")
256 ("\\(\e$
(5!U
!h
!O
\e(B\\)" . "\e$
(5"(\e(B")
257 ("\\(\e$(5!W!h!O\e(B\\)" .
"\e$(5")\e(B")
260 ("\\(\e$
(5!3!h
!B
!h
!O
!h
!M
\e(B\\)" . "\e$
(5$
!\e(B")
261 ("\\(\e$
(5!3!h
!B
!h
!T
\e(B\\)" . "\e$
(5$
"\e(B")
262 ("\\(\e$(5!3!h!B!h!M\e(B\\)" .
"\e$(5$#\e(B")
263 ("\\(\e$(5!3!h!F!h!M\e(B\\)" .
"\e$(5$$\e(B")
264 ("\\(\e$(5!3!h!O!h!M\e(B\\)" .
"\e$(5$%\e(B")
265 ("\\(\e$(5!3!h!T!h!M\e(B\\)" .
"\e$(5$&\e(B")
266 ("\\(\e$(5!3!h!3\e(B\\)" .
"\e$(5$'\e(B")
267 ("\\(\e$(5!3!h!B\e(B\\)" .
"\e$(5$(\e(B")
268 ("\\(\e$(5!3!h!F\e(B\\)" .
"\e$(5$)\e(B")
269 ("\\(\e$(5!3!h!L\e(B\\)" .
"\e$(5$*\e(B")
270 ("\\(\e$(5!3!h!M\e(B\\)" .
"\e$(5$+\e(B")
271 ("\\(\e$(5!3!h!Q\e(B\\)" .
"\e$(5$,\e(B")
272 ("\\(\e$(5!3!h!T\e(B\\)" .
"\e$(5$-\e(B")
273 ("\\(\e$(5!3!h!V\e(B\\)" .
"\e$(5$.\e(B")
274 ("\\(\e$(5!6!h!F\e(B\\)" .
"\e$(5$/\e(B")
275 ("\\(\e$(5!7!h!3!h!B!h!M\e(B\\)" .
"\e$(5$0\e(B")
276 ("\\(\e$(5!7!h!3!h!V!h!T\e(B\\)" .
"\e$(5$1\e(B")
277 ("\\(\e$(5!7!h!3!h!B\e(B\\)" .
"\e$(5$2\e(B")
278 ("\\(\e$(5!7!h!3!h!V\e(B\\)" .
"\e$(5$3\e(B")
279 ("\\(\e$(5!7!h!6!h!O\e(B\\)" .
"\e$(5$4\e(B")
280 ("\\(\e$(5!7!h!3!h!M\e(B\\)" .
"\e$(5$5\e(B")
281 ("\\(\e$(5!7!h!4!h!M\e(B\\)" .
"\e$(5$6\e(B")
282 ("\\(\e$(5!7!h!5!h!M\e(B\\)" .
"\e$(5$7\e(B")
283 ("\\(\e$(5!7!h!6!h!M\e(B\\)" .
"\e$(5$8\e(B")
284 ("\\(\e$(5!7!h!3\e(B\\)" .
"\e$(5$9\e(B")
285 ("\\(\e$(5!7!h!4\e(B\\)" .
"\e$(5$:\e(B")
286 ("\\(\e$(5!7!h!5\e(B\\)" .
"\e$(5$;\e(B")
287 ("\\(\e$(5!7!h!6\e(B\\)" .
"\e$(5$<\e(B")
288 ("\\(\e$(5!7!h!7\e(B\\)" .
"\e$(5$=\e(B")
289 ("\\(\e$(5!7!h!F\e(B\\)" .
"\e$(5$>\e(B")
290 ("\\(\e$(5!7!h!L\e(B\\)" .
"\e$(5$?\e(B")
291 ("\\(\e$(5!7!h!M\e(B\\)" .
"\e$(5$@\e(B")
292 ("\\(\e$(5!8!h!8\e(B\\)" .
"\e$(5$A\e(B")
293 ("\\(\e$(5!8!h!<\e(B\\)" .
"\e$(5$B\e(B")
294 ("\\(\e$(5!9!h!M\e(B\\)" .
"\e$(5$C\e(B")
295 ("\\(\e$(5!:!h!O\e(B\\)" .
"\e$(5$D\e(B")
296 ("\\(\e$(5!:!h!h\e(B\\)" .
"\e$(5$E\e(B")
297 ("\\(\e$(5!<!h!8\e(B\\)" .
"\e$(5$F\e(B")
298 ("\\(\e$(5!<!h!:\e(B\\)" .
"\e$(5$G\e(B")
299 ("\\(\e$(5!=!h!3\e(B\\)" .
"\e$(5$H\e(B")
300 ("\\(\e$(5!=!h!=\e(B\\)" .
"\e$(5$I\e(B")
301 ("\\(\e$(5!=!h!>\e(B\\)" .
"\e$(5$J\e(B")
302 ("\\(\e$(5!=!h!M\e(B\\)" .
"\e$(5$K\e(B")
303 ("\\(\e$(5!>!h!M\e(B\\)" .
"\e$(5$L\e(B")
304 ("\\(\e$(5!?!h!5!h!M\e(B\\)" .
"\e$(5$M\e(B")
305 ("\\(\e$(5!?!h!6!h!O\e(B\\)" .
"\e$(5$N\e(B")
306 ("\\(\e$(5!?!h!O!h!M\e(B\\)" .
"\e$(5$O\e(B")
307 ("\\(\e$(5!?!h!5\e(B\\)" .
"\e$(5$P\e(B")
308 ("\\(\e$(5!?!h!6\e(B\\)" .
"\e$(5$Q\e(B")
309 ("\\(\e$(5!?!h!?\e(B\\)" .
"\e$(5$R\e(B")
310 ("\\(\e$(5!?!h!L\e(B\\)" .
"\e$(5$S\e(B")
311 ("\\(\e$(5!?!h!M\e(B\\)" .
"\e$(5$T\e(B")
312 ("\\(\e$(5!@!h!M\e(B\\)" .
"\e$(5$`\e(B")
313 ("\\(\e$(5!B!h!B\e(B\\)" .
"\e$(5$a\e(B")
314 ("\\(\e$(5!B!h!F\e(B\\)" .
"\e$(5$b\e(B")
315 ("\\(\e$(5!D!h!D!h!M\e(B\\)" .
"\e$(5$c\e(B")
316 ("\\(\e$(5!D!h!E!h!M\e(B\\)" .
"\e$(5$d\e(B")
317 ("\\(\e$(5!D!h!K!h!M\e(B\\)" .
"\e$(5$e\e(B")
318 ("\\(\e$(5!D!h!O!h!M\e(B\\)" .
"\e$(5$f\e(B")
319 ("\\(\e$(5!D!h!T!h!M\e(B\\)" .
"\e$(5$g\e(B")
320 ("\\(\e$(5!D!h!5!h!O\e(B\\)" .
"\e$(5$h\e(B")
321 ("\\(\e$(5!D!h!6!h!O\e(B\\)" .
"\e$(5$i\e(B")
322 ("\\(\e$(5!D!h!D!h!T\e(B\\)" .
"\e$(5$j\e(B")
323 ("\\(\e$(5!D!h!E!h!T\e(B\\)" .
"\e$(5$k\e(B")
324 ("\\(\e$(5!D!h!5\e(B\\)" .
"\e$(5$l\e(B")
325 ("\\(\e$(5!D!h!6\e(B\\)" .
"\e$(5$m\e(B")
326 ("\\(\e$(5!D!h!D\e(B\\)" .
"\e$(5$n\e(B")
327 ("\\(\e$(5!D!h!E\e(B\\)" .
"\e$(5$o\e(B")
328 ("\\(\e$(5!D!h!F\e(B\\)" .
"\e$(5$p\e(B")
329 ("\\(\e$(5!D!h!J\e(B\\)" .
"\e$(5$q\e(B")
330 ("\\(\e$(5!D!h!K\e(B\\)" .
"\e$(5$r\e(B")
331 ("\\(\e$(5!D!h!L\e(B\\)" .
"\e$(5$s\e(B")
332 ("\\(\e$(5!D!h!M\e(B\\)" .
"\e$(5$t\e(B")
333 ("\\(\e$(5!D!h!T\e(B\\)" .
"\e$(5$u\e(B")
334 ("\\(\e$(5!E!h!F\e(B\\)" .
"\e$(5$v\e(B")
335 ("\\(\e$(5!F!h!F\e(B\\)" .
"\e$(5$w\e(B")
336 ("\\(\e$(5!H!h!B\e(B\\)" .
"\e$(5$x\e(B")
337 ("\\(\e$(5!H!h!F\e(B\\)" .
"\e$(5$y\e(B")
338 ("\\(\e$(5!H!h!Q\e(B\\)" .
"\e$(5$z\e(B")
339 ("\\(\e$(5!J!h!F\e(B\\)" .
"\e$(5${\e(B")
340 ("\\(\e$(5!J!h!J\e(B\\)" .
"\e$(5$|\e(B")
341 ("\\(\e$(5!J!h!T\e(B\\)" .
"\e$(5$}\e(B")
342 ("\\(\e$(5!K!h!F\e(B\\)" .
"\e$(5$~\e(B")
343 ("\\(\e$(5!L!h!F\e(B\\)" .
"\e$(5#P\e(B")
344 ("\\(\e$(5!L!h!Q\e(B\\)" .
"\e$(5#Q\e(B")
345 ("\\(\e$(5!Q!h!Q\e(B\\)" .
"\e$(5#`\e(B")
346 ("\\(\e$(5!T!h!F\e(B\\)" .
"\e$(5#a\e(B")
347 ("\\(\e$(5!T!h!T\e(B\\)" .
"\e$(5#b\e(B")
348 ("\\(\e$(5!U!h!8\e(B\\)" .
"\e$(5#c\e(B")
349 ("\\(\e$(5!U!h!F\e(B\\)" .
"\e$(5#d\e(B")
350 ("\\(\e$(5!U!h!J\e(B\\)" .
"\e$(5#e\e(B")
351 ("\\(\e$(5!U!h!Q\e(B\\)" .
"\e$(5#f\e(B")
352 ("\\(\e$(5!U!h!T\e(B\\)" .
"\e$(5#g\e(B")
353 ("\\(\e$(5!V!h!=!h!O!h!M\e(B\\)" .
"\e$(5#h\e(B")
354 ("\\(\e$(5!V!h!=!h!M\e(B\\)" .
"\e$(5#i\e(B")
355 ("\\(\e$(5!V!h!=!h!T\e(B\\)" .
"\e$(5#j\e(B")
356 ("\\(\e$(5!V!h!=\e(B\\)" .
"\e$(5#k\e(B")
357 ("\\(\e$(5!V!h!>\e(B\\)" .
"\e$(5#l\e(B")
358 ("\\(\e$(5!W!h!F\e(B\\)" .
"\e$(5#m\e(B")
359 ("\\(\e$(5!W!h!O\e(B\\)" .
"\e$(5#n\e(B")
360 ("\\(\e$(5!X!h!A\e(B\\)" .
"\e$(5#p\e(B")
361 ("\\(\e$(5!X!h!F\e(B\\)" .
"\e$(5#q\e(B")
362 ("\\(\e$(5!X!h!L\e(B\\)" .
"\e$(5#r\e(B")
363 ("\\(\e$(5!X!h!M\e(B\\)" .
"\e$(5#s\e(B")
364 ("\\(\e$(5!X!h!O\e(B\\)" .
"\e$(5#t\e(B")
365 ("\\(\e$(5!X!h!Q\e(B\\)" .
"\e$(5#u\e(B")
366 ("\\(\e$(5!X!h!T\e(B\\)" .
"\e$(5#v\e(B")
367 ;; Special Ligature Rules
368 ("\\(\e$(5!X!_\e(B\\)" .
"\e$(5#R\e(B")
370 ;; Half form with ligature. Special "r" case is included. "r"
371 ;; connection which is not listed here has not been examined yet.
372 ;; I don't know what to do with them.
375 ("\\(\e$(5!3!h!V!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"l
\e(B")
376 ("\\(\e$
(5!:!h
!<!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"m\e(B")
378 ("\\(\e$(5!5!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"`\e(B")
379 ("\\(\e$
(5!6!h
!F
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"a\e(B")
380 ;; ("\\(\e$(5!<!h!8!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"c\e(B") ; Mistake, must check later.
381 ("\\(\e$(5!B!h!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"c
\e(B")
382 ("\\(\e$
(5!B
!h
!O
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"d\e(B")
383 ("\\(\e$(5!E!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"e
\e(B")
384 ("\\(\e$
(5!E
!h
!O
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"f\e(B")
385 ("\\(\e$(5!H!h!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"g
\e(B")
386 ("\\(\e$
(5!U
!h
!8!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"h\e(B")
387 ("\\(\e$(5!U!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"i
\e(B")
388 ("\\(\e$
(5!U
!h
!T
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"j\e(B")
389 ;; ("\\(\e$(5!U!h!T!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"k\e(B") ; must check later.
390 ;; Conjunction form associated with Nukta sign.
391 ("\\(\e$(5!3!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"s
\e(B")
392 ("\\(\e$
(5!4!i
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"t\e(B")
393 ("\\(\e$(5!5!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"u
\e(B")
394 ("\\(\e$
(5!:!i
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"z\e(B")
395 ("\\(\e$(5!I!i!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"y
\e(B")
397 ;; For consonants other than listed above, glyph-composition will
398 ;; be applied. If the consonant which is preceding "\e$
(5!O
\e(B" does not
399 ;; have the vertical line (such as "\e$
(5!?
\e(B"), "\e$
(5"r\e(B" is put beneath the
402 ;; ("cons-not-yet-listed-up\\(\e$(5!h!O\e(B\\)" . "\e$(5"q\e(B")
403 ("[\e$(5!7!9!=!>!?!@!D!O!P!R!S!X\e(B]\\(\e$(5!h!O\e(B\\)" .
"\e$(5"r
\e(B")
404 ("\e$
(5!?
!i
\e(B\\(\e$
(5!h
!O
\e(B\\)" . "\e$
(5"r\e(B")
405 ("\e$(5!@!i\e(B\\(\e$(5!h!O\e(B\\)" .
"\e$(5"r
\e(B")
408 ("\\(\e$
(5!!!i
\e(B\\)" . "\e$
(5#!\e(B")
409 ("\\(\e$
(5!&!i
\e(B\\)" . "\e$
(5#&\e(B")
410 ("\\(\e$
(5!'!i
\e(B\\)" . "\e$
(5#'\e(B")
411 ("\\(\e$
(5!*!i
\e(B\\)" . "\e$
(5#*\e(B")
412 ("\\(\e$
(5!3!i
\e(B\\)" . "\e$
(5#3\e(B")
413 ("\\(\e$
(5!4!i
\e(B\\)" . "\e$
(5#4\e(B")
414 ("\\(\e$
(5!5!i
\e(B\\)" . "\e$
(5#5\e(B")
415 ("\\(\e$
(5!:!i
\e(B\\)" . "\e$
(5#:\e(B")
416 ("\\(\e$
(5!?
!i
\e(B\\)" . "\e$
(5#?
\e(B")
417 ("\\(\e$
(5!@!i
\e(B\\)" . "\e$
(5#@\e(B")
418 ("\\(\e$
(5!I
!i
\e(B\\)" . "\e$
(5#I
\e(B")
419 ("\\(\e$
(5!j
!i
\e(B\\)" . "\e$
(5#J
\e(B")
422 ("\\(\e$
(5!3!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"3\e(B")
423 ("\\(\e$(5!4!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"4\e(B")
424 ("\\(\e$
(5!5!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"5\e(B")
425 ("\\(\e$(5!6!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"6\e(B")
426 ("\\(\e$
(5!8!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"8\e(B")
427 ("\\(\e$(5!:!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5":\e(B")
428 ("\\(\e$
(5!;!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5";\e(B")
429 ("\\(\e$(5!<!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"<\e(B")
430 ("\\(\e$
(5!A
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"A\e(B")
431 ("\\(\e$(5!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"B
\e(B")
432 ("\\(\e$
(5!C
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"C\e(B")
433 ("\\(\e$(5!E!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"E
\e(B")
434 ("\\(\e$
(5!F
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"F\e(B")
435 ("\\(\e$(5!G!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"G
\e(B")
436 ("\\(\e$
(5!H
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"H\e(B")
437 ("\\(\e$(5!I!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"I
\e(B")
438 ("\\(\e$
(5!J
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"J\e(B")
439 ("\\(\e$(5!K!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"K
\e(B")
440 ("\\(\e$
(5!L
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"L\e(B")
441 ("\\(\e$(5!M!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"M
\e(B")
442 ("\\(\e$
(5!N
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"N\e(B")
443 ("\\(\e$(5!Q!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"Q
\e(B")
444 ("\\(\e$
(5!R
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"R\e(B")
445 ("\\(\e$(5!S!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"S
\e(B")
446 ("\\(\e$
(5!T
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"T\e(B")
447 ("\\(\e$(5!U!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"U
\e(B")
448 ("\\(\e$
(5!V
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"V\e(B")
449 ("\\(\e$(5!W!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" .
"\e$(5"W
\e(B")
451 "Alist of regexps of Devanagari character sequences vs composed characters.
")
454 ;;("\\(\e$
(5!F
!h
\e(B\\)[\e$
(5!3\e(B-\e$
(5!X
\e(B]" . "\e$
(5"F\e(B")
455 ;;(string-match "\\(\e$(5!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" "\e$(5!X![!F!h!D!\\e(B") => 8
456 ;;(match-end 1) => 16
459 ;; Defining character properties : char-to-glyph, glyph-to-char
461 ;; * If char-to-glyph is non-nil, it would be one of the following forms.
463 ;; (("character-regexp" . "glyphs")
465 ;; (("character-regexp" . ?glyph)
467 ;; ("characters-regexp" . "glyphs")
471 ;; * If glyph-to-char is non-nil, it would be one of the following forms.
473 ;; (("glyph-regexp" . "characters") ;; This is the only case in Devanagari
475 ;; (("glyph-regexp" . ?character)
477 ;; ("glyph-regexp" . "characters")
484 (let ((rules devanagari-char-to-glyph-rules
))
486 (let ((rule (car rules
))
487 (chars) (char) (glyph))
488 (setq rules
(cdr rules
))
489 (string-match "\\\\(\\(.+\\)\\\\)" (car rule
))
490 (setq chars
(substring (car rule
) (match-beginning 1) (match-end 1)))
491 (setq char
(string-to-char chars
))
492 (setq glyph
(string-to-char (cdr rule
))) ; assume one glyph in devan.
493 (put-char-code-property
495 (append (get-char-code-property char
'char-to-glyph
) (list rule
)))
496 (put-char-code-property glyph
'glyph-to-char chars
))))
499 ;; Convert Character Code to Glyph Code
503 (defun char-to-glyph-devanagari (src-str)
504 "Convert Devanagari characters in the string to Devanagari glyphs.
505 Ligatures and special rules are processed."
508 (while (< pos
(length src-str
))
510 (rules (get-char-code-property
512 ;; caution. other forms not supported for now.
513 (substring src-str pos
)) 'char-to-glyph
)))
515 (let* ((rule (car rules
))
517 (if (string-match regexp src-str
)
518 (if (= (match-beginning 1) pos
)
520 (setq dst-str
(concat dst-str
(cdr rule
)))
521 (setq rules nil
) ; Get out of the loop.
523 ;; proceed `pos' for replaced characters.
524 (setq pos
(match-end 1)))
525 (setq rules
(cdr rules
)))
526 (setq rules
(cdr rules
)))))
527 ;; proceed to next position
529 (let ((nextchar (string-to-char (substring src-str pos
))))
531 (char-bytes (string-to-char (substring src-str pos
)))))
532 (setq dst-str
(concat dst-str
(char-to-string nextchar
)))))))
536 ;;(char-to-glyph-devanagari "\e$(5!X![!F!h!D!\\e(B") => "\e$(5!X!["F!D!\\e(B"
537 ;;(char-to-glyph-devanagari "\e$(5!O!Z!V!h!=!h!O![!M\e(B") => ???
540 ;; Phase 2: Compose Glyphs to form One Glyph.
543 ;; Each list consist of glyph, application-priority and application-direction.
545 ;; Glyphs will be ordered from low priority number to high priority number.
546 ;; If application-priority is omitted, it is assumed to be 0.
547 ;; If application-direction is omitted, it is asumbed to be '(mr . ml).
549 (defconst devanagari-composition-rules
550 '((?
\e$
(5!!\e(B 60 (tr . br
))
551 (?
\e$
(5!"\e(B 60 (tr . br))
608 (?\e$(5![\e(B 40 (ml . mr))
610 (?\e$(5!]\e(B 40 (bc . tc))
611 (?\e$(5!^\e(B 40 (bc . tc))
612 (?\e$(5!_\e(B 40 (bc . tc))
613 (?\e$(5!`\e(B 40 (tc . bc))
614 (?\e$(5!a\e(B 40 (tc . bc))
615 (?\e$(5!b\e(B 40 (tc . bc))
616 (?\e$(5!c\e(B 40 (tc . bc))
621 (?\e$(5!h\e(B 0 (br . tr)) ; Halant's special treatment.
622 (?\e$(5!i\e(B 0 (br . tr)) ; Nukta's special treatment.
723 (?\e$(5"p
\e(B 20 (tr . br
))
724 (?
\e$
(5"q\e(B 20 (br . tr))
725 (?\e$(5"r
\e(B 20 (br . tr
))
928 ;; Determine composition priority and rule of the array of Glyphs.
929 ;; Sort the glyphs with their priority.
932 ;;(devanagari-reorder-glyph-for-composition '[?\e$(5"5\e(B ?\e$(5!X\e(B ?\e$(5![\e(B])
933 ;; => ((446680 0) (446773 0) (446683 50 (ml . mr)))
935 (defun devanagari-reorder-glyph-for-composition (glyph-alist)
937 (ordered-glyphs '()))
938 (while (< pos
(length glyph-alist
))
939 (let* ((glyph (aref glyph-alist pos
)))
942 (append ordered-glyphs
(list (assq glyph devanagari-composition-rules
))))))
943 (sort ordered-glyphs
'(lambda (x y
) (< (car (cdr x
)) (car (cdr y
)))))))
945 ;;(devanagari-compose-to-one-glyph "\e$(5"5!X![\e(B") => "\e2\e$(6!XP"5@![\e1\e(B"
947 (defun devanagari-compose-to-one-glyph (devanagari-string)
948 (let* ((o-glyph-list (devanagari-reorder-glyph-for-composition
949 (string-to-vector devanagari-string
)))
950 ;; List of glyphs to be composed.
951 (cmp-glyph-list (list (car (car o-glyph-list
))))
952 (o-glyph-list (cdr o-glyph-list
)))
954 (let* ((o-glyph (car o-glyph-list
))
955 (glyph (if (< 2 (length o-glyph
))
956 ;; default composition
957 (list (car (cdr (cdr o-glyph
))) (car o-glyph
))
958 ;; composition with a specified rule
959 (list '(mr . ml
) (car o-glyph
)))))
960 (setq o-glyph-list
(cdr o-glyph-list
))
961 (setq cmp-glyph-list
(append cmp-glyph-list glyph
))))
962 ;; Before applying compose-chars, convert glyphs to
963 ;; 1-column width if possible.
964 (setq cmp-glyph-list
(devanagari-wide-to-narrow cmp-glyph-list
))
965 (if (= (length cmp-glyph-list
) 1) (char-to-string (car cmp-glyph-list
))
966 (apply 'compose-chars cmp-glyph-list
))))
970 ;; Phase 2.5 Convert Appropriate Character to 1-column shape.
972 ;; This is temporary and should be removed out when Emacs supports
973 ;; variable width characters.
975 ;; This will convert the composing glyphs (2 column glyphs)
976 ;; to narrow (1 column) glyphs if they exist.
978 ;; devanagari-wide-to-narrow-old converts glyphs simply.
979 ;; devanagari-wide-to-narrow takes care of upper/lower apply-glyphs
980 ;; with 2 column base-glyph.
982 ;; Execution Examples
983 ;;(devanagari-wide-to-narrow '(446680 446773 (ml . mr) 446683))
984 ;;(devanagari-wide-to-narrow '(?\e$(5!6\e(B (ml . ml) 446773 (tc . mr) 446683))
986 (defun devanagari-wide-to-narrow (src-list)
987 (if (null src-list
) '()
989 (if (and (numberp (car src-list
))
990 (cdr (assq (car src-list
) devanagari-1-column-char
)))
991 (cdr (assq (car src-list
) devanagari-1-column-char
))
993 (devanagari-wide-to-narrow (cdr src-list
)))))
995 ;; Make this function obsolete temporary Because now Emacs supports
996 ;; attaching 1 column character at the center 2 column char. However,
997 ;; there are still problems attempting to attach Halant or Nukta sign
998 ;; at the non-vowel consonant. This problem can not be solved until
999 ;; Emacs supports attaching the glyph at `temporary-preserved metric'.
1001 (defun devanagari-wide-to-narrow-old (src-list)
1002 (if (null src-list
) (progn (error "devanagari-wide-to-narrow error") nil
)
1003 (let* ((base-glyph (cdr (assq (car src-list
) devanagari-1-column-char
)))
1004 (wide-base-glyph nil
)
1005 (apply-glyph-list (cdr src-list
)))
1006 (if (null base-glyph
)
1008 (setq wide-base-glyph t
)
1009 (setq base-glyph
(car src-list
))))
1011 (devanagari-wide-to-narrow-iter apply-glyph-list wide-base-glyph
))
1014 ;; Convert apply-glyph-list from 2-column to 1-column.
1015 ;; wide-base-glyph is t when base-glyph is 2-column.
1016 ;; When apply-glyph is put at the top or bottom of 2-column base-glyph,
1017 ;; they must be 2-column glyph, too. Otherwise, they will be
1018 ;; converted to 1-column glyph if possible.
1020 (defun devanagari-wide-to-narrow-iter (apply-glyph-list wide-base-glyph
)
1021 (if (< (length apply-glyph-list
) 2) '()
1022 (let* ((apply-dir (car apply-glyph-list
))
1023 (apply-glyph (car (cdr apply-glyph-list
)))
1024 (apply-rest (cdr (cdr apply-glyph-list
)))
1025 (put-t-or-b (member (car apply-dir
) '(tl tc tr bl bc br
)))
1026 (narrow-glyph (cdr (assq apply-glyph devanagari-1-column-char
))))
1029 (if (or (and wide-base-glyph put-t-or-b
)
1030 (null narrow-glyph
))
1031 apply-glyph narrow-glyph
))
1032 (devanagari-wide-to-narrow-iter apply-rest wide-base-glyph
)))))
1039 (defun devanagari-compose-string (str)
1040 (let ((len (length str
))
1041 (src str
) (dst "") rest match-b match-e
)
1042 (while (string-match devanagari-composite-glyph-unit-examine src
)
1043 (setq match-b
(match-beginning 0) match-e
(match-end 0))
1046 (substring src
0 match-b
)
1047 (devanagari-compose-to-one-glyph
1048 (char-to-glyph-devanagari
1049 (substring src match-b match-e
)))))
1050 (setq src
(substring src match-e
)))
1051 (setq dst
(concat dst src
))
1055 (defun devanagari-compose-region (from to
)
1058 (narrow-to-region from to
)
1059 (goto-char (point-min))
1060 (while (re-search-forward devanagari-composite-glyph-unit-examine nil t
)
1061 (let* ((match-b (match-beginning 0)) (match-e (match-end 0))
1062 (cmps (devanagari-compose-to-one-glyph
1063 (char-to-glyph-devanagari
1064 (buffer-substring match-b match-e
)))))
1065 (delete-region match-b match-e
)
1069 ;; Decomposition of composite font.
1072 (defun devanagari-normalize-narrow-glyph (charlist)
1073 (let ((wide-char (car (rassoc (car charlist
) devanagari-1-column-char
))))
1074 (if (null charlist
) nil
1075 (cons (if (null wide-char
) (car charlist
) wide-char
)
1076 (devanagari-normalize-narrow-glyph (cdr charlist
))))))
1078 (defvar devanagari-decomposition-rules
1084 (defun devanagari-reorder-glyph-for-decomposition (glyphlist)
1085 "This function re-orders glyph list.
"
1088 (let ((xx (assoc x devanagari-decomposition-rules))
1089 (yy (assoc y devanagari-decomposition-rules)))
1090 (if (null xx) (setq xx 0))
1091 (if (null yy) (setq yy 0))
1094 (defun devanagari-decompose-char (char)
1095 "This function decomposes one Devanagari composite character to
1096 basic Devanagari character.
"
1097 (let ((glyphlist (decompose-composite-char char)))
1098 (if (not (listp glyphlist))
1099 (setq glyphlist (list glyphlist)))
1100 (setq glyphlist (devanagari-normalize-narrow-glyph glyphlist))
1101 (mapconcat '(lambda (x) (let ((char (get-char-code-property
1103 (if (null char) (char-to-string x) char)))
1104 (devanagari-reorder-glyph-for-decomposition glyphlist)
1108 (defun devanagari-decompose-string (str)
1109 "This function Decomposes Devanagari glyph string to
1110 basic Devanagari character string.
"
1111 (let ((src str) (dst ""))
1112 (while (not (equal src ""))
1113 (let* ((char (string-to-char src))
1114 (clen (char-bytes char)))
1115 (setq src (substring src clen))
1116 (setq dst (concat dst
1117 (devanagari-decompose-char char)))))
1121 (defun devanagari-decompose-region (from to)
1124 (narrow-to-region from to)
1125 (goto-char (point-min))
1126 (while (re-search-forward ".
" nil t)
1127 (let* ((match-b (match-beginning 0)) (match-e (match-end 0))
1128 (decmps (devanagari-decompose-string (buffer-substring match-b match-e))))
1134 ;; For pre-write and post-read conversion
1137 (defun devanagari-compose-from-is13194-region (from to)
1138 "Compose IS
13194 characters in the region to Devanagari characters.
"
1141 (narrow-to-region from to)
1142 (indian-to-devanagari-region (point-min) (point-max))
1143 (devanagari-compose-region (point-min) (point-max))))
1146 (defun devanagari-decompose-to-is13194-region (from to)
1147 "Decompose Devanagari characters in the region to IS
13194 characters.
"
1150 (narrow-to-region from to)
1151 (devanagari-decompose-region (point-min) (point-max))
1152 (devanagari-to-indian-region (point-min) (point-max))))
1155 (provide 'language/devan-util)
1157 ;;; Local Variables:
1158 ;;; generated-autoload-file: "..
/loaddefs.el
"
1160 ;;; devan-util.el ends here