Commit | Line | Data |
---|---|---|
cdbe6a03 | 1 | ;;; indian.el --- Indian languages support -*- coding: utf-8; -*- |
4ed46869 | 2 | |
ab422c4d | 3 | ;; Copyright (C) 1997, 1999, 2001-2013 Free Software Foundation, Inc. |
5df4f04c | 4 | ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
eaa61218 KH |
5 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
6 | ;; Registration Number H14PRO021 | |
4ed46869 | 7 | |
cdbe6a03 KH |
8 | ;; Maintainer: Kenichi Handa <handa@m17n.org> |
9 | ;; KAWABATA, Taichi <kawabata@m17n.org> | |
51896ebc | 10 | ;; Keywords: multilingual, i18n, Indian |
4ed46869 KH |
11 | |
12 | ;; This file is part of GNU Emacs. | |
13 | ||
4936186e | 14 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
4ed46869 | 15 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
16 | ;; the Free Software Foundation, either version 3 of the License, or |
17 | ;; (at your option) any later version. | |
4ed46869 KH |
18 | |
19 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
20 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
22 | ;; GNU General Public License for more details. | |
23 | ||
24 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 25 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
4ed46869 KH |
26 | |
27 | ;;; Commentary: | |
28 | ||
cdbe6a03 KH |
29 | ;; This file contains definitions of Indian language environments, and |
30 | ;; setups for displaying the scrtipts used there. | |
4ed46869 KH |
31 | |
32 | ;;; Code: | |
33 | ||
e1915ab3 KH |
34 | (define-coding-system 'in-is13194-devanagari |
35 | "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)." | |
36 | :coding-type 'iso-2022 | |
37 | :mnemonic ?D | |
38 | :designation [ascii indian-is13194 nil nil] | |
39 | :charset-list '(ascii indian-is13194) | |
5e145d59 KH |
40 | :post-read-conversion 'in-is13194-post-read-conversion |
41 | :pre-write-conversion 'in-is13194-pre-write-conversion) | |
e1915ab3 KH |
42 | |
43 | (define-coding-system-alias 'devanagari 'in-is13194-devanagari) | |
8805e649 | 44 | |
cdbe6a03 KH |
45 | (set-language-info-alist |
46 | "Devanagari" '((charset unicode) | |
47 | (coding-system utf-8) | |
48 | (coding-priority utf-8) | |
7019c177 | 49 | (input-method . "devanagari-aiba") |
cdbe6a03 KH |
50 | (documentation . "\ |
51 | Such languages using Devanagari script as Hindi and Marathi | |
52 | are supported in this language environment.")) | |
53 | '("Indian")) | |
54 | ||
55 | (set-language-info-alist | |
56 | "Bengali" '((charset unicode) | |
57 | (coding-system utf-8) | |
58 | (coding-priority utf-8) | |
59 | (input-method . "bengali-itrans") | |
60 | (documentation . "\ | |
61 | Such languages using Bengali script as Bengali and Assamese | |
62 | are supported in this language environment.")) | |
63 | '("Indian")) | |
64 | ||
65 | (set-language-info-alist | |
66 | "Punjabi" '((charset unicode) | |
67 | (coding-system utf-8) | |
68 | (coding-priority utf-8) | |
69 | (input-method . "punjabi-itrans") | |
70 | (documentation . "\ | |
71 | North Indian language Punjabi is supported in this language environment.")) | |
72 | '("Indian")) | |
73 | ||
74 | (set-language-info-alist | |
75 | "Gujarati" '((charset unicode) | |
76 | (coding-system utf-8) | |
77 | (coding-priority utf-8) | |
78 | (input-method . "gujarati-itrans") | |
79 | (documentation . "\ | |
80 | North Indian language Gujarati is supported in this language environment.")) | |
81 | '("Indian")) | |
82 | ||
83 | (set-language-info-alist | |
84 | "Oriya" '((charset unicode) | |
85 | (coding-system utf-8) | |
86 | (coding-priority utf-8) | |
87 | (input-method . "oriya-itrans") | |
88 | (documentation . "\ | |
89 | Such languages using Oriya script as Oriya, Khonti, and Santali | |
90 | are supported in this language environment.")) | |
91 | '("Indian")) | |
92 | ||
93 | (set-language-info-alist | |
94 | "Tamil" '((charset unicode) | |
95 | (coding-system utf-8) | |
96 | (coding-priority utf-8) | |
97 | (input-method . "tamil-itrans") | |
98 | (documentation . "\ | |
99 | South Indian Language Tamil is supported in this language environment.")) | |
100 | '("Indian")) | |
101 | ||
102 | (set-language-info-alist | |
103 | "Telugu" '((charset unicode) | |
104 | (coding-system utf-8) | |
105 | (coding-priority utf-8) | |
106 | (input-method . "telugu-itrans") | |
107 | (documentation . "\ | |
108 | South Indian Language Telugu is supported in this language environment.")) | |
109 | '("Indian")) | |
110 | ||
111 | (set-language-info-alist | |
112 | "Kannada" '((charset unicode) | |
113 | (coding-system mule-utf-8) | |
114 | (coding-priority mule-utf-8) | |
115 | (input-method . "kannada-itrans") | |
116 | (sample-text . "Kannada (ಕನ್ನಡ) ನಮಸ್ಕಾರ") | |
117 | (documentation . "\ | |
118 | Kannada language and script is supported in this language | |
119 | environment.")) | |
120 | '("Indian")) | |
121 | ||
122 | (set-language-info-alist | |
123 | "Malayalam" '((charset unicode) | |
124 | (coding-system utf-8) | |
125 | (coding-priority utf-8) | |
126 | (input-method . "malayalam-itrans") | |
127 | (documentation . "\ | |
128 | South Indian language Malayalam is supported in this language environment.")) | |
129 | '("Indian")) | |
098d86d5 | 130 | |
3ff3655c KH |
131 | ;; Replace mnemonic characters in REGEXP according to TABLE. TABLE is |
132 | ;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING). | |
133 | ||
134 | (defun indian-compose-regexp (regexp table) | |
135 | (let ((case-fold-search nil)) | |
136 | (dolist (elt table) | |
137 | (setq regexp (replace-regexp-in-string (car elt) (cdr elt) regexp t t))) | |
138 | regexp)) | |
139 | ||
ef19e2f3 | 140 | (defconst devanagari-composable-pattern |
f758cd2a | 141 | (let ((table |
42763dda KH |
142 | '(("a" . "[\u0900-\u0902]") ; vowel modifier (above) |
143 | ("A" . "\u0903") ; vowel modifier (post) | |
144 | ("V" . "[\u0904-\u0914\u0960-\u0961\u0972]") ; independent vowel | |
145 | ("C" . "[\u0915-\u0939\u0958-\u095F\u0979-\u097F]") ; consonant | |
146 | ("R" . "\u0930") ; RA | |
147 | ("n" . "\u093C") ; NUKTA | |
148 | ("v" . "[\u093E-\u094C\u094E\u0955\u0962-\u0963]") ; vowel sign | |
149 | ("H" . "\u094D") ; HALANT | |
150 | ("s" . "[\u0951-\u0952]") ; stress sign | |
151 | ("t" . "[\u0953-\u0954]") ; accent | |
152 | ("N" . "\u200C") ; ZWNJ | |
153 | ("J" . "\u200D") ; ZWJ | |
154 | ("X" . "[\u0900-\u097F]")))) ; all coverage | |
f758cd2a KH |
155 | (indian-compose-regexp |
156 | (concat | |
157 | ;; syllables with an independent vowel, or | |
42763dda | 158 | "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?s?t?A?\\|" |
f758cd2a | 159 | ;; consonant-based syllables, or |
42763dda | 160 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?s?t?A?\\)\\|" |
f758cd2a KH |
161 | ;; special consonant form, or |
162 | "JHR\\|" | |
163 | ;; any other singleton characters | |
164 | "X") | |
165 | table)) | |
ef19e2f3 KH |
166 | "Regexp matching a composable sequence of Devanagari characters.") |
167 | ||
42763dda KH |
168 | (defconst bengali-composable-pattern |
169 | (let ((table | |
170 | '(("a" . "\u0981") ; SIGN CANDRABINDU | |
171 | ("A" . "[\u0982-\u0983]") ; SIGN ANUSVARA .. VISARGA | |
172 | ("V" . "[\u0985-\u0994\u09E0-\u09E1]") ; independent vowel | |
173 | ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant | |
174 | ("B" . "[\u09AC\u09AF-\u09B0\u09F0]") ; BA, YA, RA | |
175 | ("R" . "[\u09B0\u09F0]") ; RA | |
176 | ("n" . "\u09BC") ; NUKTA | |
177 | ("v" . "[\u09BE-\u09CC\u09D7\u09E2-\u09E3]") ; vowel sign | |
178 | ("H" . "\u09CD") ; HALANT | |
179 | ("T" . "\u09CE") ; KHANDA TA | |
180 | ("N" . "\u200C") ; ZWNJ | |
181 | ("J" . "\u200D") ; ZWJ | |
182 | ("X" . "[\u0980-\u09FF]")))) ; all coverage | |
183 | (indian-compose-regexp | |
184 | (concat | |
185 | ;; syllables with an independent vowel, or | |
186 | "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|" | |
187 | ;; consonant-based syllables, or | |
188 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?\\)\\|" | |
189 | ;; another syllables with an independent vowel, or | |
190 | "\\(?:RH\\)?T\\|" | |
191 | ;; special consonant form, or | |
192 | "JHB\\|" | |
193 | ;; any other singleton characters | |
194 | "X") | |
195 | table)) | |
196 | "Regexp matching a composable sequence of Bengali characters.") | |
197 | ||
198 | (defconst gurmukhi-composable-pattern | |
199 | (let ((table | |
ece33a6e KH |
200 | '(("a" . "[\u0A01-\u0A02\u0A70]") ; SIGN ADAK BINDI .. BINDI, TIPPI |
201 | ("A" . "\u0A03") ; SIGN VISARGA | |
42763dda KH |
202 | ("V" . "[\u0A05-\u0A14]") ; independent vowel |
203 | ("C" . "[\u0A15-\u0A39\u0A59-\u0A5E]") ; consonant | |
ece33a6e | 204 | ("Y" . "[\u0A2F-u0A30\u0A35\u0A39]") ; YA, RA, VA, HA |
42763dda KH |
205 | ("n" . "\u0A3C") ; NUKTA |
206 | ("v" . "[\u0A3E-\u0A4C]") ; vowel sign | |
207 | ("H" . "\u0A4D") ; VIRAMA | |
42763dda KH |
208 | ("N" . "\u200C") ; ZWNJ |
209 | ("J" . "\u200D") ; ZWJ | |
210 | ("X" . "[\u0A00-\u0A7F]")))) ; all coverage | |
211 | (indian-compose-regexp | |
212 | (concat | |
213 | ;; consonant-based syllables, or | |
214 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|" | |
215 | ;; syllables with an independent vowel, or | |
216 | "Vn?\\(?:J?HY\\)?v*n?a?A?\\|" | |
217 | ;; special consonant form, or | |
218 | "JHY\\|" | |
219 | ;; any other singleton characters | |
220 | "X") | |
221 | table)) | |
222 | "Regexp matching a composable sequence of Gurmukhi characters.") | |
223 | ||
224 | (defconst gujarati-composable-pattern | |
225 | (let ((table | |
226 | '(("a" . "[\u0A81-\u0A82]") ; SIGN CANDRABINDU .. ANUSVARA | |
b106e264 | 227 | ("A" . "\u0A83") ; SIGN VISARGA |
42763dda KH |
228 | ("V" . "[\u0A85-\u0A94\u0AE0-\u0AE1]") ; independent vowel |
229 | ("C" . "[\u0A95-\u0AB9]") ; consonant | |
230 | ("R" . "\u0AB0") ; RA | |
231 | ("n" . "\u0ABC") ; NUKTA | |
232 | ("v" . "[\u0ABE-\u0ACC\u0AE2-\u0AE3]") ; vowel sign | |
233 | ("H" . "\u0ACD") ; VIRAMA | |
234 | ("N" . "\u200C") ; ZWNJ | |
235 | ("J" . "\u200D") ; ZWJ | |
236 | ("X" . "[\u0A80-\u0AFF]")))) ; all coverage | |
237 | (indian-compose-regexp | |
238 | (concat | |
239 | ;; syllables with an independent vowel, or | |
240 | "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?A?\\|" | |
241 | ;; consonant-based syllables, or | |
86a366f4 | 242 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|" |
42763dda KH |
243 | ;; special consonant form, or |
244 | "JHR\\|" | |
245 | ;; any other singleton characters | |
246 | "X") | |
247 | table)) | |
248 | "Regexp matching a composable sequence of Gujarati characters.") | |
249 | ||
250 | (defconst oriya-composable-pattern | |
251 | (let ((table | |
252 | '(("a" . "\u0B01") ; SIGN CANDRABINDU | |
253 | ("A" . "[\u0B02-\u0B03]") ; SIGN ANUSVARA .. VISARGA | |
254 | ("V" . "[\u0B05-\u0B14\u0B60-\u0B61]") ; independent vowel | |
255 | ("C" . "[\u0B15-\u0B39\u0B5C-\u0B5D\u0B71]") ; consonant | |
256 | ("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23-\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38-\u0B39]") ; consonant with below form | |
86a366f4 | 257 | ("R" . "\u0B30") ; RA |
42763dda | 258 | ("n" . "\u0B3C") ; NUKTA |
771533aa | 259 | ("v" . "[\u0B3E-\u0B4C\u0B56-\u0B57\u0B62-\u0B63]") ; vowel sign |
42763dda KH |
260 | ("H" . "\u0B4D") ; VIRAMA |
261 | ("N" . "\u200C") ; ZWNJ | |
262 | ("J" . "\u200D") ; ZWJ | |
263 | ("X" . "[\u0B00-\u0B7F]")))) ; all coverage | |
264 | (indian-compose-regexp | |
265 | (concat | |
266 | ;; syllables with an independent vowel, or | |
267 | "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|" | |
268 | ;; consonant-based syllables, or | |
86a366f4 | 269 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|" |
42763dda KH |
270 | ;; special consonant form, or |
271 | "JHB\\|" | |
272 | ;; any other singleton characters | |
273 | "X") | |
274 | table)) | |
275 | "Regexp matching a composable sequence of Oriya characters.") | |
276 | ||
ef19e2f3 | 277 | (defconst tamil-composable-pattern |
42763dda KH |
278 | (let ((table |
279 | '(("a" . "\u0B82") ; SIGN ANUSVARA | |
280 | ("V" . "[\u0B85-\u0B94]") ; independent vowel | |
281 | ("C" . "[\u0B95-\u0BB9]") ; consonant | |
771533aa | 282 | ("v" . "[\u0BBE-\u0BCC\u0BD7]") ; vowel sign |
42763dda KH |
283 | ("H" . "\u0BCD") ; VIRAMA |
284 | ("N" . "\u200C") ; ZWNJ | |
285 | ("J" . "\u200D") ; ZWJ | |
286 | ("X" . "[\u0B80-\u0BFF]")))) ; all coverage | |
287 | (indian-compose-regexp | |
288 | (concat | |
289 | ;; consonant-based syllables, or | |
86a366f4 | 290 | "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|" |
42763dda KH |
291 | ;; syllables with an independent vowel, or |
292 | "Vv*a?\\|" | |
293 | ;; any other singleton characters | |
294 | "X") | |
295 | table)) | |
ef19e2f3 KH |
296 | "Regexp matching a composable sequence of Tamil characters.") |
297 | ||
42763dda KH |
298 | (defconst telugu-composable-pattern |
299 | (let ((table | |
300 | '(("a" . "[\u0C01-\u0C03]") ; SIGN CANDRABINDU .. VISARGA | |
301 | ("V" . "[\u0C05-\u0C14\u0C60-\u0C61]") ; independent vowel | |
302 | ("C" . "[\u0C15-\u0C39\u0C58-\u0C59]") ; consonant | |
303 | ("v" . "[\u0C3E-\u0C4C\u0C55-\u0C56\u0C62-\u0C63]") ; vowel sign | |
86a366f4 | 304 | ("H" . "\u0C4D") ; VIRAMA |
42763dda KH |
305 | ("N" . "\u200C") ; ZWNJ |
306 | ("J" . "\u200D") ; ZWJ | |
307 | ("X" . "[\u0C00-\u0C7F]")))) ; all coverage | |
308 | (indian-compose-regexp | |
309 | (concat | |
310 | ;; consonant-based syllables, or | |
86a366f4 | 311 | "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|" |
42763dda KH |
312 | ;; syllables with an independent vowel, or |
313 | "V\\(?:J?HC\\)?v*a?\\|" | |
314 | ;; special consonant form, or | |
315 | "JHC\\|" | |
316 | ;; any other singleton characters | |
317 | "X") | |
318 | table)) | |
319 | "Regexp matching a composable sequence of Telugu characters.") | |
320 | ||
ef19e2f3 | 321 | (defconst kannada-composable-pattern |
42763dda KH |
322 | (let ((table |
323 | '(("A" . "[\u0C82-\u0C83]") ; SIGN ANUSVARA .. VISARGA | |
324 | ("V" . "[\u0C85-\u0C94\u0CE0-\u0CE1]") ; independent vowel | |
325 | ("C" . "[\u0C95-\u0CB9\u0CDE]") ; consonant | |
86a366f4 | 326 | ("R" . "\u0CB0") ; RA |
42763dda KH |
327 | ("n" . "\u0CBC") ; NUKTA |
328 | ("v" . "[\u0CBE-\u0CCC\u0CD5-\u0CD6\u0CE2-\u0CE3]") ; vowel sign | |
329 | ("H" . "\u0CCD") ; VIRAMA | |
330 | ("N" . "\u200C") ; ZWNJ | |
331 | ("J" . "\u200D") ; ZWJ | |
332 | ("X" . "[\u0C80-\u0CFF]")))) ; all coverage | |
333 | (indian-compose-regexp | |
334 | (concat | |
335 | ;; syllables with an independent vowel, or | |
336 | "\\(?:RH\\)?Vn?\\(?:J?HC\\)?v?A?\\|" | |
337 | ;; consonant-based syllables, or | |
86a366f4 | 338 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?A?\\)\\|" |
42763dda | 339 | ;; special consonant form, or |
86a366f4 | 340 | "JHC\\|" |
42763dda KH |
341 | ;; any other singleton characters |
342 | "X") | |
343 | table)) | |
ef19e2f3 KH |
344 | "Regexp matching a composable sequence of Kannada characters.") |
345 | ||
346 | (defconst malayalam-composable-pattern | |
f758cd2a | 347 | (let ((table |
42763dda KH |
348 | '(("A" . "[\u0D02-\u0D03]") ; SIGN ANUSVARA .. VISARGA |
349 | ("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel | |
f758cd2a | 350 | ("C" . "[\u0D15-\u0D39]") ; consonant |
42763dda | 351 | ("Y" . "[\u0D2F-\u0D30\u0D32\u0D35]") ; YA, RA, LA, VA |
771533aa | 352 | ("v" . "[\u0D3E-\u0D4C\u0D57\u0D62-\u0D63]") ; postbase matra |
86a366f4 | 353 | ("H" . "\u0D4D") ; SIGN VIRAMA |
69db641d KH |
354 | ("N" . "\u200C") ; ZWNJ |
355 | ("J" . "\u200D") ; ZWJ | |
f758cd2a | 356 | ("X" . "[\u0D00-\u0D7F]")))) ; all coverage |
3ff3655c KH |
357 | (indian-compose-regexp |
358 | (concat | |
f758cd2a | 359 | ;; consonant-based syllables, or |
86a366f4 | 360 | "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v?A?\\)\\|" |
42763dda KH |
361 | ;; syllables with an independent vowel, or |
362 | "V\\(?:J?HY\\)?v*?A?\\|" | |
f758cd2a | 363 | ;; special consonant form, or |
42763dda | 364 | "JHY\\|" |
3ff3655c KH |
365 | ;; any other singleton characters |
366 | "X") | |
367 | table)) | |
ef19e2f3 KH |
368 | "Regexp matching a composable sequence of Malayalam characters.") |
369 | ||
9b87bff0 | 370 | (let ((script-regexp-alist |
f758cd2a | 371 | `((devanagari . ,devanagari-composable-pattern) |
42763dda KH |
372 | (bengali . ,bengali-composable-pattern) |
373 | (gurmukhi . ,gurmukhi-composable-pattern) | |
374 | (gujarati . ,gujarati-composable-pattern) | |
375 | (oriya . ,oriya-composable-pattern) | |
376 | (tamil . ,tamil-composable-pattern) | |
377 | (telugu . ,telugu-composable-pattern) | |
378 | (kannada . ,kannada-composable-pattern) | |
3ff3655c | 379 | (malayalam . ,malayalam-composable-pattern)))) |
ef19e2f3 KH |
380 | (map-char-table |
381 | #'(lambda (key val) | |
382 | (let ((slot (assq val script-regexp-alist))) | |
383 | (if slot | |
384 | (set-char-table-range | |
385 | composition-function-table key | |
386 | (list (vector (cdr slot) 0 'font-shape-gstring)))))) | |
387 | char-script-table)) | |
9b87bff0 | 388 | |
41da80b1 | 389 | (provide 'indian) |
1bec6fdb | 390 | |
4ed46869 | 391 | ;;; indian.el ends here |