Commit | Line | Data |
---|---|---|
cdbe6a03 | 1 | ;;; indian.el --- Indian languages support -*- coding: utf-8; -*- |
4ed46869 | 2 | |
114f9c96 | 3 | ;; Copyright (C) 1997, 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 |
d4877ac1 | 4 | ;; Free Software Foundation, Inc. |
114f9c96 | 5 | ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 |
eaa61218 KH |
6 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
7 | ;; Registration Number H14PRO021 | |
4ed46869 | 8 | |
cdbe6a03 KH |
9 | ;; Maintainer: Kenichi Handa <handa@m17n.org> |
10 | ;; KAWABATA, Taichi <kawabata@m17n.org> | |
51896ebc | 11 | ;; Keywords: multilingual, i18n, Indian |
4ed46869 KH |
12 | |
13 | ;; This file is part of GNU Emacs. | |
14 | ||
4936186e | 15 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
4ed46869 | 16 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
17 | ;; the Free Software Foundation, either version 3 of the License, or |
18 | ;; (at your option) any later version. | |
4ed46869 KH |
19 | |
20 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
21 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
22 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
23 | ;; GNU General Public License for more details. | |
24 | ||
25 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 26 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
4ed46869 KH |
27 | |
28 | ;;; Commentary: | |
29 | ||
cdbe6a03 KH |
30 | ;; This file contains definitions of Indian language environments, and |
31 | ;; setups for displaying the scrtipts used there. | |
4ed46869 KH |
32 | |
33 | ;;; Code: | |
34 | ||
e1915ab3 KH |
35 | (define-coding-system 'in-is13194-devanagari |
36 | "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)." | |
37 | :coding-type 'iso-2022 | |
38 | :mnemonic ?D | |
39 | :designation [ascii indian-is13194 nil nil] | |
40 | :charset-list '(ascii indian-is13194) | |
5e145d59 KH |
41 | :post-read-conversion 'in-is13194-post-read-conversion |
42 | :pre-write-conversion 'in-is13194-pre-write-conversion) | |
e1915ab3 KH |
43 | |
44 | (define-coding-system-alias 'devanagari 'in-is13194-devanagari) | |
8805e649 | 45 | |
cdbe6a03 KH |
46 | (set-language-info-alist |
47 | "Devanagari" '((charset unicode) | |
48 | (coding-system utf-8) | |
49 | (coding-priority utf-8) | |
50 | (input-method . "dev-aiba") | |
51 | (documentation . "\ | |
52 | Such languages using Devanagari script as Hindi and Marathi | |
53 | are supported in this language environment.")) | |
54 | '("Indian")) | |
55 | ||
56 | (set-language-info-alist | |
57 | "Bengali" '((charset unicode) | |
58 | (coding-system utf-8) | |
59 | (coding-priority utf-8) | |
60 | (input-method . "bengali-itrans") | |
61 | (documentation . "\ | |
62 | Such languages using Bengali script as Bengali and Assamese | |
63 | are supported in this language environment.")) | |
64 | '("Indian")) | |
65 | ||
66 | (set-language-info-alist | |
67 | "Punjabi" '((charset unicode) | |
68 | (coding-system utf-8) | |
69 | (coding-priority utf-8) | |
70 | (input-method . "punjabi-itrans") | |
71 | (documentation . "\ | |
72 | North Indian language Punjabi is supported in this language environment.")) | |
73 | '("Indian")) | |
74 | ||
75 | (set-language-info-alist | |
76 | "Gujarati" '((charset unicode) | |
77 | (coding-system utf-8) | |
78 | (coding-priority utf-8) | |
79 | (input-method . "gujarati-itrans") | |
80 | (documentation . "\ | |
81 | North Indian language Gujarati is supported in this language environment.")) | |
82 | '("Indian")) | |
83 | ||
84 | (set-language-info-alist | |
85 | "Oriya" '((charset unicode) | |
86 | (coding-system utf-8) | |
87 | (coding-priority utf-8) | |
88 | (input-method . "oriya-itrans") | |
89 | (documentation . "\ | |
90 | Such languages using Oriya script as Oriya, Khonti, and Santali | |
91 | are supported in this language environment.")) | |
92 | '("Indian")) | |
93 | ||
94 | (set-language-info-alist | |
95 | "Tamil" '((charset unicode) | |
96 | (coding-system utf-8) | |
97 | (coding-priority utf-8) | |
98 | (input-method . "tamil-itrans") | |
99 | (documentation . "\ | |
100 | South Indian Language Tamil is supported in this language environment.")) | |
101 | '("Indian")) | |
102 | ||
103 | (set-language-info-alist | |
104 | "Telugu" '((charset unicode) | |
105 | (coding-system utf-8) | |
106 | (coding-priority utf-8) | |
107 | (input-method . "telugu-itrans") | |
108 | (documentation . "\ | |
109 | South Indian Language Telugu is supported in this language environment.")) | |
110 | '("Indian")) | |
111 | ||
112 | (set-language-info-alist | |
113 | "Kannada" '((charset unicode) | |
114 | (coding-system mule-utf-8) | |
115 | (coding-priority mule-utf-8) | |
116 | (input-method . "kannada-itrans") | |
117 | (sample-text . "Kannada (ಕನ್ನಡ) ನಮಸ್ಕಾರ") | |
118 | (documentation . "\ | |
119 | Kannada language and script is supported in this language | |
120 | environment.")) | |
121 | '("Indian")) | |
122 | ||
123 | (set-language-info-alist | |
124 | "Malayalam" '((charset unicode) | |
125 | (coding-system utf-8) | |
126 | (coding-priority utf-8) | |
127 | (input-method . "malayalam-itrans") | |
128 | (documentation . "\ | |
129 | South Indian language Malayalam is supported in this language environment.")) | |
130 | '("Indian")) | |
098d86d5 | 131 | |
3ff3655c KH |
132 | ;; Replace mnemonic characters in REGEXP according to TABLE. TABLE is |
133 | ;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING). | |
134 | ||
135 | (defun indian-compose-regexp (regexp table) | |
136 | (let ((case-fold-search nil)) | |
137 | (dolist (elt table) | |
138 | (setq regexp (replace-regexp-in-string (car elt) (cdr elt) regexp t t))) | |
139 | regexp)) | |
140 | ||
ef19e2f3 | 141 | (defconst devanagari-composable-pattern |
f758cd2a | 142 | (let ((table |
42763dda KH |
143 | '(("a" . "[\u0900-\u0902]") ; vowel modifier (above) |
144 | ("A" . "\u0903") ; vowel modifier (post) | |
145 | ("V" . "[\u0904-\u0914\u0960-\u0961\u0972]") ; independent vowel | |
146 | ("C" . "[\u0915-\u0939\u0958-\u095F\u0979-\u097F]") ; consonant | |
147 | ("R" . "\u0930") ; RA | |
148 | ("n" . "\u093C") ; NUKTA | |
149 | ("v" . "[\u093E-\u094C\u094E\u0955\u0962-\u0963]") ; vowel sign | |
150 | ("H" . "\u094D") ; HALANT | |
151 | ("s" . "[\u0951-\u0952]") ; stress sign | |
152 | ("t" . "[\u0953-\u0954]") ; accent | |
153 | ("N" . "\u200C") ; ZWNJ | |
154 | ("J" . "\u200D") ; ZWJ | |
155 | ("X" . "[\u0900-\u097F]")))) ; all coverage | |
f758cd2a KH |
156 | (indian-compose-regexp |
157 | (concat | |
158 | ;; syllables with an independent vowel, or | |
42763dda | 159 | "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?s?t?A?\\|" |
f758cd2a | 160 | ;; consonant-based syllables, or |
42763dda | 161 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?s?t?A?\\)\\|" |
f758cd2a KH |
162 | ;; special consonant form, or |
163 | "JHR\\|" | |
164 | ;; any other singleton characters | |
165 | "X") | |
166 | table)) | |
ef19e2f3 KH |
167 | "Regexp matching a composable sequence of Devanagari characters.") |
168 | ||
42763dda KH |
169 | (defconst bengali-composable-pattern |
170 | (let ((table | |
171 | '(("a" . "\u0981") ; SIGN CANDRABINDU | |
172 | ("A" . "[\u0982-\u0983]") ; SIGN ANUSVARA .. VISARGA | |
173 | ("V" . "[\u0985-\u0994\u09E0-\u09E1]") ; independent vowel | |
174 | ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant | |
175 | ("B" . "[\u09AC\u09AF-\u09B0\u09F0]") ; BA, YA, RA | |
176 | ("R" . "[\u09B0\u09F0]") ; RA | |
177 | ("n" . "\u09BC") ; NUKTA | |
178 | ("v" . "[\u09BE-\u09CC\u09D7\u09E2-\u09E3]") ; vowel sign | |
179 | ("H" . "\u09CD") ; HALANT | |
180 | ("T" . "\u09CE") ; KHANDA TA | |
181 | ("N" . "\u200C") ; ZWNJ | |
182 | ("J" . "\u200D") ; ZWJ | |
183 | ("X" . "[\u0980-\u09FF]")))) ; all coverage | |
184 | (indian-compose-regexp | |
185 | (concat | |
186 | ;; syllables with an independent vowel, or | |
187 | "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|" | |
188 | ;; consonant-based syllables, or | |
189 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?\\)\\|" | |
190 | ;; another syllables with an independent vowel, or | |
191 | "\\(?:RH\\)?T\\|" | |
192 | ;; special consonant form, or | |
193 | "JHB\\|" | |
194 | ;; any other singleton characters | |
195 | "X") | |
196 | table)) | |
197 | "Regexp matching a composable sequence of Bengali characters.") | |
198 | ||
199 | (defconst gurmukhi-composable-pattern | |
200 | (let ((table | |
201 | '(("a" . "[\u0A01-\u0A02]") ; SIGN ADAK BINDI .. BINDI | |
202 | ("A" . "\u0A03]") ; SIGN VISARGA | |
203 | ("V" . "[\u0A05-\u0A14]") ; independent vowel | |
204 | ("C" . "[\u0A15-\u0A39\u0A59-\u0A5E]") ; consonant | |
205 | ("Y" . "[\u0A2F\u0A30\u0A35\u0A39]") ; YA, RA, VA, HA | |
206 | ("n" . "\u0A3C") ; NUKTA | |
207 | ("v" . "[\u0A3E-\u0A4C]") ; vowel sign | |
208 | ("H" . "\u0A4D") ; VIRAMA | |
209 | ("a" . "\u0A70") ; TIPPI | |
210 | ("N" . "\u200C") ; ZWNJ | |
211 | ("J" . "\u200D") ; ZWJ | |
212 | ("X" . "[\u0A00-\u0A7F]")))) ; all coverage | |
213 | (indian-compose-regexp | |
214 | (concat | |
215 | ;; consonant-based syllables, or | |
216 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|" | |
217 | ;; syllables with an independent vowel, or | |
218 | "Vn?\\(?:J?HY\\)?v*n?a?A?\\|" | |
219 | ;; special consonant form, or | |
220 | "JHY\\|" | |
221 | ;; any other singleton characters | |
222 | "X") | |
223 | table)) | |
224 | "Regexp matching a composable sequence of Gurmukhi characters.") | |
225 | ||
226 | (defconst gujarati-composable-pattern | |
227 | (let ((table | |
228 | '(("a" . "[\u0A81-\u0A82]") ; SIGN CANDRABINDU .. ANUSVARA | |
229 | ("A" . "\u0A83]") ; SIGN VISARGA | |
230 | ("V" . "[\u0A85-\u0A94\u0AE0-\u0AE1]") ; independent vowel | |
231 | ("C" . "[\u0A95-\u0AB9]") ; consonant | |
232 | ("R" . "\u0AB0") ; RA | |
233 | ("n" . "\u0ABC") ; NUKTA | |
234 | ("v" . "[\u0ABE-\u0ACC\u0AE2-\u0AE3]") ; vowel sign | |
235 | ("H" . "\u0ACD") ; VIRAMA | |
236 | ("N" . "\u200C") ; ZWNJ | |
237 | ("J" . "\u200D") ; ZWJ | |
238 | ("X" . "[\u0A80-\u0AFF]")))) ; all coverage | |
239 | (indian-compose-regexp | |
240 | (concat | |
241 | ;; syllables with an independent vowel, or | |
242 | "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?A?\\|" | |
243 | ;; consonant-based syllables, or | |
86a366f4 | 244 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|" |
42763dda KH |
245 | ;; special consonant form, or |
246 | "JHR\\|" | |
247 | ;; any other singleton characters | |
248 | "X") | |
249 | table)) | |
250 | "Regexp matching a composable sequence of Gujarati characters.") | |
251 | ||
252 | (defconst oriya-composable-pattern | |
253 | (let ((table | |
254 | '(("a" . "\u0B01") ; SIGN CANDRABINDU | |
255 | ("A" . "[\u0B02-\u0B03]") ; SIGN ANUSVARA .. VISARGA | |
256 | ("V" . "[\u0B05-\u0B14\u0B60-\u0B61]") ; independent vowel | |
257 | ("C" . "[\u0B15-\u0B39\u0B5C-\u0B5D\u0B71]") ; consonant | |
258 | ("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23-\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38-\u0B39]") ; consonant with below form | |
86a366f4 | 259 | ("R" . "\u0B30") ; RA |
42763dda | 260 | ("n" . "\u0B3C") ; NUKTA |
771533aa | 261 | ("v" . "[\u0B3E-\u0B4C\u0B56-\u0B57\u0B62-\u0B63]") ; vowel sign |
42763dda KH |
262 | ("H" . "\u0B4D") ; VIRAMA |
263 | ("N" . "\u200C") ; ZWNJ | |
264 | ("J" . "\u200D") ; ZWJ | |
265 | ("X" . "[\u0B00-\u0B7F]")))) ; all coverage | |
266 | (indian-compose-regexp | |
267 | (concat | |
268 | ;; syllables with an independent vowel, or | |
269 | "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|" | |
270 | ;; consonant-based syllables, or | |
86a366f4 | 271 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|" |
42763dda KH |
272 | ;; special consonant form, or |
273 | "JHB\\|" | |
274 | ;; any other singleton characters | |
275 | "X") | |
276 | table)) | |
277 | "Regexp matching a composable sequence of Oriya characters.") | |
278 | ||
ef19e2f3 | 279 | (defconst tamil-composable-pattern |
42763dda KH |
280 | (let ((table |
281 | '(("a" . "\u0B82") ; SIGN ANUSVARA | |
282 | ("V" . "[\u0B85-\u0B94]") ; independent vowel | |
283 | ("C" . "[\u0B95-\u0BB9]") ; consonant | |
771533aa | 284 | ("v" . "[\u0BBE-\u0BCC\u0BD7]") ; vowel sign |
42763dda KH |
285 | ("H" . "\u0BCD") ; VIRAMA |
286 | ("N" . "\u200C") ; ZWNJ | |
287 | ("J" . "\u200D") ; ZWJ | |
288 | ("X" . "[\u0B80-\u0BFF]")))) ; all coverage | |
289 | (indian-compose-regexp | |
290 | (concat | |
291 | ;; consonant-based syllables, or | |
86a366f4 | 292 | "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|" |
42763dda KH |
293 | ;; syllables with an independent vowel, or |
294 | "Vv*a?\\|" | |
295 | ;; any other singleton characters | |
296 | "X") | |
297 | table)) | |
ef19e2f3 KH |
298 | "Regexp matching a composable sequence of Tamil characters.") |
299 | ||
42763dda KH |
300 | (defconst telugu-composable-pattern |
301 | (let ((table | |
302 | '(("a" . "[\u0C01-\u0C03]") ; SIGN CANDRABINDU .. VISARGA | |
303 | ("V" . "[\u0C05-\u0C14\u0C60-\u0C61]") ; independent vowel | |
304 | ("C" . "[\u0C15-\u0C39\u0C58-\u0C59]") ; consonant | |
305 | ("v" . "[\u0C3E-\u0C4C\u0C55-\u0C56\u0C62-\u0C63]") ; vowel sign | |
86a366f4 | 306 | ("H" . "\u0C4D") ; VIRAMA |
42763dda KH |
307 | ("N" . "\u200C") ; ZWNJ |
308 | ("J" . "\u200D") ; ZWJ | |
309 | ("X" . "[\u0C00-\u0C7F]")))) ; all coverage | |
310 | (indian-compose-regexp | |
311 | (concat | |
312 | ;; consonant-based syllables, or | |
86a366f4 | 313 | "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|" |
42763dda KH |
314 | ;; syllables with an independent vowel, or |
315 | "V\\(?:J?HC\\)?v*a?\\|" | |
316 | ;; special consonant form, or | |
317 | "JHC\\|" | |
318 | ;; any other singleton characters | |
319 | "X") | |
320 | table)) | |
321 | "Regexp matching a composable sequence of Telugu characters.") | |
322 | ||
ef19e2f3 | 323 | (defconst kannada-composable-pattern |
42763dda KH |
324 | (let ((table |
325 | '(("A" . "[\u0C82-\u0C83]") ; SIGN ANUSVARA .. VISARGA | |
326 | ("V" . "[\u0C85-\u0C94\u0CE0-\u0CE1]") ; independent vowel | |
327 | ("C" . "[\u0C95-\u0CB9\u0CDE]") ; consonant | |
86a366f4 | 328 | ("R" . "\u0CB0") ; RA |
42763dda KH |
329 | ("n" . "\u0CBC") ; NUKTA |
330 | ("v" . "[\u0CBE-\u0CCC\u0CD5-\u0CD6\u0CE2-\u0CE3]") ; vowel sign | |
331 | ("H" . "\u0CCD") ; VIRAMA | |
332 | ("N" . "\u200C") ; ZWNJ | |
333 | ("J" . "\u200D") ; ZWJ | |
334 | ("X" . "[\u0C80-\u0CFF]")))) ; all coverage | |
335 | (indian-compose-regexp | |
336 | (concat | |
337 | ;; syllables with an independent vowel, or | |
338 | "\\(?:RH\\)?Vn?\\(?:J?HC\\)?v?A?\\|" | |
339 | ;; consonant-based syllables, or | |
86a366f4 | 340 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?A?\\)\\|" |
42763dda | 341 | ;; special consonant form, or |
86a366f4 | 342 | "JHC\\|" |
42763dda KH |
343 | ;; any other singleton characters |
344 | "X") | |
345 | table)) | |
ef19e2f3 KH |
346 | "Regexp matching a composable sequence of Kannada characters.") |
347 | ||
348 | (defconst malayalam-composable-pattern | |
f758cd2a | 349 | (let ((table |
42763dda KH |
350 | '(("A" . "[\u0D02-\u0D03]") ; SIGN ANUSVARA .. VISARGA |
351 | ("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel | |
f758cd2a | 352 | ("C" . "[\u0D15-\u0D39]") ; consonant |
42763dda | 353 | ("Y" . "[\u0D2F-\u0D30\u0D32\u0D35]") ; YA, RA, LA, VA |
771533aa | 354 | ("v" . "[\u0D3E-\u0D4C\u0D57\u0D62-\u0D63]") ; postbase matra |
86a366f4 | 355 | ("H" . "\u0D4D") ; SIGN VIRAMA |
69db641d KH |
356 | ("N" . "\u200C") ; ZWNJ |
357 | ("J" . "\u200D") ; ZWJ | |
f758cd2a | 358 | ("X" . "[\u0D00-\u0D7F]")))) ; all coverage |
3ff3655c KH |
359 | (indian-compose-regexp |
360 | (concat | |
f758cd2a | 361 | ;; consonant-based syllables, or |
86a366f4 | 362 | "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v?A?\\)\\|" |
42763dda KH |
363 | ;; syllables with an independent vowel, or |
364 | "V\\(?:J?HY\\)?v*?A?\\|" | |
f758cd2a | 365 | ;; special consonant form, or |
42763dda | 366 | "JHY\\|" |
3ff3655c KH |
367 | ;; any other singleton characters |
368 | "X") | |
369 | table)) | |
ef19e2f3 KH |
370 | "Regexp matching a composable sequence of Malayalam characters.") |
371 | ||
9b87bff0 | 372 | (let ((script-regexp-alist |
f758cd2a | 373 | `((devanagari . ,devanagari-composable-pattern) |
42763dda KH |
374 | (bengali . ,bengali-composable-pattern) |
375 | (gurmukhi . ,gurmukhi-composable-pattern) | |
376 | (gujarati . ,gujarati-composable-pattern) | |
377 | (oriya . ,oriya-composable-pattern) | |
378 | (tamil . ,tamil-composable-pattern) | |
379 | (telugu . ,telugu-composable-pattern) | |
380 | (kannada . ,kannada-composable-pattern) | |
3ff3655c | 381 | (malayalam . ,malayalam-composable-pattern)))) |
ef19e2f3 KH |
382 | (map-char-table |
383 | #'(lambda (key val) | |
384 | (let ((slot (assq val script-regexp-alist))) | |
385 | (if slot | |
386 | (set-char-table-range | |
387 | composition-function-table key | |
388 | (list (vector (cdr slot) 0 'font-shape-gstring)))))) | |
389 | char-script-table)) | |
9b87bff0 | 390 | |
41da80b1 | 391 | (provide 'indian) |
1bec6fdb | 392 | |
cbee283d | 393 | ;; arch-tag: 83aa8fc7-7ee2-4364-a6e5-498f5e3b8c2f |
4ed46869 | 394 | ;;; indian.el ends here |