merge trunk
[bpt/emacs.git] / lisp / language / indian.el
CommitLineData
cdbe6a03 1;;; indian.el --- Indian languages support -*- coding: utf-8; -*-
4ed46869 2
114f9c96 3;; Copyright (C) 1997, 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
d4877ac1 4;; Free Software Foundation, Inc.
114f9c96 5;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
eaa61218
KH
6;; National Institute of Advanced Industrial Science and Technology (AIST)
7;; Registration Number H14PRO021
4ed46869 8
cdbe6a03
KH
9;; Maintainer: Kenichi Handa <handa@m17n.org>
10;; KAWABATA, Taichi <kawabata@m17n.org>
51896ebc 11;; Keywords: multilingual, i18n, Indian
4ed46869
KH
12
13;; This file is part of GNU Emacs.
14
4936186e 15;; GNU Emacs is free software: you can redistribute it and/or modify
4ed46869 16;; it under the terms of the GNU General Public License as published by
4936186e
GM
17;; the Free Software Foundation, either version 3 of the License, or
18;; (at your option) any later version.
4ed46869
KH
19
20;; GNU Emacs is distributed in the hope that it will be useful,
21;; but WITHOUT ANY WARRANTY; without even the implied warranty of
22;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23;; GNU General Public License for more details.
24
25;; You should have received a copy of the GNU General Public License
4936186e 26;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
4ed46869
KH
27
28;;; Commentary:
29
cdbe6a03
KH
30;; This file contains definitions of Indian language environments, and
31;; setups for displaying the scrtipts used there.
4ed46869
KH
32
33;;; Code:
34
e1915ab3
KH
35(define-coding-system 'in-is13194-devanagari
36 "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)."
37 :coding-type 'iso-2022
38 :mnemonic ?D
39 :designation [ascii indian-is13194 nil nil]
40 :charset-list '(ascii indian-is13194)
5e145d59
KH
41 :post-read-conversion 'in-is13194-post-read-conversion
42 :pre-write-conversion 'in-is13194-pre-write-conversion)
e1915ab3
KH
43
44(define-coding-system-alias 'devanagari 'in-is13194-devanagari)
8805e649 45
cdbe6a03
KH
46(set-language-info-alist
47 "Devanagari" '((charset unicode)
48 (coding-system utf-8)
49 (coding-priority utf-8)
50 (input-method . "dev-aiba")
51 (documentation . "\
52Such languages using Devanagari script as Hindi and Marathi
53are supported in this language environment."))
54 '("Indian"))
55
56(set-language-info-alist
57 "Bengali" '((charset unicode)
58 (coding-system utf-8)
59 (coding-priority utf-8)
60 (input-method . "bengali-itrans")
61 (documentation . "\
62Such languages using Bengali script as Bengali and Assamese
63are supported in this language environment."))
64 '("Indian"))
65
66(set-language-info-alist
67 "Punjabi" '((charset unicode)
68 (coding-system utf-8)
69 (coding-priority utf-8)
70 (input-method . "punjabi-itrans")
71 (documentation . "\
72North Indian language Punjabi is supported in this language environment."))
73 '("Indian"))
74
75(set-language-info-alist
76 "Gujarati" '((charset unicode)
77 (coding-system utf-8)
78 (coding-priority utf-8)
79 (input-method . "gujarati-itrans")
80 (documentation . "\
81North Indian language Gujarati is supported in this language environment."))
82 '("Indian"))
83
84(set-language-info-alist
85 "Oriya" '((charset unicode)
86 (coding-system utf-8)
87 (coding-priority utf-8)
88 (input-method . "oriya-itrans")
89 (documentation . "\
90Such languages using Oriya script as Oriya, Khonti, and Santali
91are supported in this language environment."))
92 '("Indian"))
93
94(set-language-info-alist
95 "Tamil" '((charset unicode)
96 (coding-system utf-8)
97 (coding-priority utf-8)
98 (input-method . "tamil-itrans")
99 (documentation . "\
100South Indian Language Tamil is supported in this language environment."))
101 '("Indian"))
102
103(set-language-info-alist
104 "Telugu" '((charset unicode)
105 (coding-system utf-8)
106 (coding-priority utf-8)
107 (input-method . "telugu-itrans")
108 (documentation . "\
109South Indian Language Telugu is supported in this language environment."))
110 '("Indian"))
111
112(set-language-info-alist
113 "Kannada" '((charset unicode)
114 (coding-system mule-utf-8)
115 (coding-priority mule-utf-8)
116 (input-method . "kannada-itrans")
117 (sample-text . "Kannada (ಕನ್ನಡ) ನಮಸ್ಕಾರ")
118 (documentation . "\
119Kannada language and script is supported in this language
120environment."))
121 '("Indian"))
122
123(set-language-info-alist
124 "Malayalam" '((charset unicode)
125 (coding-system utf-8)
126 (coding-priority utf-8)
127 (input-method . "malayalam-itrans")
128 (documentation . "\
129South Indian language Malayalam is supported in this language environment."))
130 '("Indian"))
098d86d5 131
3ff3655c
KH
132;; Replace mnemonic characters in REGEXP according to TABLE. TABLE is
133;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING).
134
135(defun indian-compose-regexp (regexp table)
136 (let ((case-fold-search nil))
137 (dolist (elt table)
138 (setq regexp (replace-regexp-in-string (car elt) (cdr elt) regexp t t)))
139 regexp))
140
ef19e2f3 141(defconst devanagari-composable-pattern
f758cd2a 142 (let ((table
42763dda
KH
143 '(("a" . "[\u0900-\u0902]") ; vowel modifier (above)
144 ("A" . "\u0903") ; vowel modifier (post)
145 ("V" . "[\u0904-\u0914\u0960-\u0961\u0972]") ; independent vowel
146 ("C" . "[\u0915-\u0939\u0958-\u095F\u0979-\u097F]") ; consonant
147 ("R" . "\u0930") ; RA
148 ("n" . "\u093C") ; NUKTA
149 ("v" . "[\u093E-\u094C\u094E\u0955\u0962-\u0963]") ; vowel sign
150 ("H" . "\u094D") ; HALANT
151 ("s" . "[\u0951-\u0952]") ; stress sign
152 ("t" . "[\u0953-\u0954]") ; accent
153 ("N" . "\u200C") ; ZWNJ
154 ("J" . "\u200D") ; ZWJ
155 ("X" . "[\u0900-\u097F]")))) ; all coverage
f758cd2a
KH
156 (indian-compose-regexp
157 (concat
158 ;; syllables with an independent vowel, or
42763dda 159 "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?s?t?A?\\|"
f758cd2a 160 ;; consonant-based syllables, or
42763dda 161 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?s?t?A?\\)\\|"
f758cd2a
KH
162 ;; special consonant form, or
163 "JHR\\|"
164 ;; any other singleton characters
165 "X")
166 table))
ef19e2f3
KH
167 "Regexp matching a composable sequence of Devanagari characters.")
168
42763dda
KH
169(defconst bengali-composable-pattern
170 (let ((table
171 '(("a" . "\u0981") ; SIGN CANDRABINDU
172 ("A" . "[\u0982-\u0983]") ; SIGN ANUSVARA .. VISARGA
173 ("V" . "[\u0985-\u0994\u09E0-\u09E1]") ; independent vowel
174 ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant
175 ("B" . "[\u09AC\u09AF-\u09B0\u09F0]") ; BA, YA, RA
176 ("R" . "[\u09B0\u09F0]") ; RA
177 ("n" . "\u09BC") ; NUKTA
178 ("v" . "[\u09BE-\u09CC\u09D7\u09E2-\u09E3]") ; vowel sign
179 ("H" . "\u09CD") ; HALANT
180 ("T" . "\u09CE") ; KHANDA TA
181 ("N" . "\u200C") ; ZWNJ
182 ("J" . "\u200D") ; ZWJ
183 ("X" . "[\u0980-\u09FF]")))) ; all coverage
184 (indian-compose-regexp
185 (concat
186 ;; syllables with an independent vowel, or
187 "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|"
188 ;; consonant-based syllables, or
189 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?\\)\\|"
190 ;; another syllables with an independent vowel, or
191 "\\(?:RH\\)?T\\|"
192 ;; special consonant form, or
193 "JHB\\|"
194 ;; any other singleton characters
195 "X")
196 table))
197 "Regexp matching a composable sequence of Bengali characters.")
198
199(defconst gurmukhi-composable-pattern
200 (let ((table
201 '(("a" . "[\u0A01-\u0A02]") ; SIGN ADAK BINDI .. BINDI
202 ("A" . "\u0A03]") ; SIGN VISARGA
203 ("V" . "[\u0A05-\u0A14]") ; independent vowel
204 ("C" . "[\u0A15-\u0A39\u0A59-\u0A5E]") ; consonant
205 ("Y" . "[\u0A2F\u0A30\u0A35\u0A39]") ; YA, RA, VA, HA
206 ("n" . "\u0A3C") ; NUKTA
207 ("v" . "[\u0A3E-\u0A4C]") ; vowel sign
208 ("H" . "\u0A4D") ; VIRAMA
209 ("a" . "\u0A70") ; TIPPI
210 ("N" . "\u200C") ; ZWNJ
211 ("J" . "\u200D") ; ZWJ
212 ("X" . "[\u0A00-\u0A7F]")))) ; all coverage
213 (indian-compose-regexp
214 (concat
215 ;; consonant-based syllables, or
216 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
217 ;; syllables with an independent vowel, or
218 "Vn?\\(?:J?HY\\)?v*n?a?A?\\|"
219 ;; special consonant form, or
220 "JHY\\|"
221 ;; any other singleton characters
222 "X")
223 table))
224 "Regexp matching a composable sequence of Gurmukhi characters.")
225
226(defconst gujarati-composable-pattern
227 (let ((table
228 '(("a" . "[\u0A81-\u0A82]") ; SIGN CANDRABINDU .. ANUSVARA
229 ("A" . "\u0A83]") ; SIGN VISARGA
230 ("V" . "[\u0A85-\u0A94\u0AE0-\u0AE1]") ; independent vowel
231 ("C" . "[\u0A95-\u0AB9]") ; consonant
232 ("R" . "\u0AB0") ; RA
233 ("n" . "\u0ABC") ; NUKTA
234 ("v" . "[\u0ABE-\u0ACC\u0AE2-\u0AE3]") ; vowel sign
235 ("H" . "\u0ACD") ; VIRAMA
236 ("N" . "\u200C") ; ZWNJ
237 ("J" . "\u200D") ; ZWJ
238 ("X" . "[\u0A80-\u0AFF]")))) ; all coverage
239 (indian-compose-regexp
240 (concat
241 ;; syllables with an independent vowel, or
242 "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?A?\\|"
243 ;; consonant-based syllables, or
86a366f4 244 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
42763dda
KH
245 ;; special consonant form, or
246 "JHR\\|"
247 ;; any other singleton characters
248 "X")
249 table))
250 "Regexp matching a composable sequence of Gujarati characters.")
251
252(defconst oriya-composable-pattern
253 (let ((table
254 '(("a" . "\u0B01") ; SIGN CANDRABINDU
255 ("A" . "[\u0B02-\u0B03]") ; SIGN ANUSVARA .. VISARGA
256 ("V" . "[\u0B05-\u0B14\u0B60-\u0B61]") ; independent vowel
257 ("C" . "[\u0B15-\u0B39\u0B5C-\u0B5D\u0B71]") ; consonant
258 ("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23-\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38-\u0B39]") ; consonant with below form
86a366f4 259 ("R" . "\u0B30") ; RA
42763dda 260 ("n" . "\u0B3C") ; NUKTA
771533aa 261 ("v" . "[\u0B3E-\u0B4C\u0B56-\u0B57\u0B62-\u0B63]") ; vowel sign
42763dda
KH
262 ("H" . "\u0B4D") ; VIRAMA
263 ("N" . "\u200C") ; ZWNJ
264 ("J" . "\u200D") ; ZWJ
265 ("X" . "[\u0B00-\u0B7F]")))) ; all coverage
266 (indian-compose-regexp
267 (concat
268 ;; syllables with an independent vowel, or
269 "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|"
270 ;; consonant-based syllables, or
86a366f4 271 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
42763dda
KH
272 ;; special consonant form, or
273 "JHB\\|"
274 ;; any other singleton characters
275 "X")
276 table))
277 "Regexp matching a composable sequence of Oriya characters.")
278
ef19e2f3 279(defconst tamil-composable-pattern
42763dda
KH
280 (let ((table
281 '(("a" . "\u0B82") ; SIGN ANUSVARA
282 ("V" . "[\u0B85-\u0B94]") ; independent vowel
283 ("C" . "[\u0B95-\u0BB9]") ; consonant
771533aa 284 ("v" . "[\u0BBE-\u0BCC\u0BD7]") ; vowel sign
42763dda
KH
285 ("H" . "\u0BCD") ; VIRAMA
286 ("N" . "\u200C") ; ZWNJ
287 ("J" . "\u200D") ; ZWJ
288 ("X" . "[\u0B80-\u0BFF]")))) ; all coverage
289 (indian-compose-regexp
290 (concat
291 ;; consonant-based syllables, or
86a366f4 292 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|"
42763dda
KH
293 ;; syllables with an independent vowel, or
294 "Vv*a?\\|"
295 ;; any other singleton characters
296 "X")
297 table))
ef19e2f3
KH
298 "Regexp matching a composable sequence of Tamil characters.")
299
42763dda
KH
300(defconst telugu-composable-pattern
301 (let ((table
302 '(("a" . "[\u0C01-\u0C03]") ; SIGN CANDRABINDU .. VISARGA
303 ("V" . "[\u0C05-\u0C14\u0C60-\u0C61]") ; independent vowel
304 ("C" . "[\u0C15-\u0C39\u0C58-\u0C59]") ; consonant
305 ("v" . "[\u0C3E-\u0C4C\u0C55-\u0C56\u0C62-\u0C63]") ; vowel sign
86a366f4 306 ("H" . "\u0C4D") ; VIRAMA
42763dda
KH
307 ("N" . "\u200C") ; ZWNJ
308 ("J" . "\u200D") ; ZWJ
309 ("X" . "[\u0C00-\u0C7F]")))) ; all coverage
310 (indian-compose-regexp
311 (concat
312 ;; consonant-based syllables, or
86a366f4 313 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|"
42763dda
KH
314 ;; syllables with an independent vowel, or
315 "V\\(?:J?HC\\)?v*a?\\|"
316 ;; special consonant form, or
317 "JHC\\|"
318 ;; any other singleton characters
319 "X")
320 table))
321 "Regexp matching a composable sequence of Telugu characters.")
322
ef19e2f3 323(defconst kannada-composable-pattern
42763dda
KH
324 (let ((table
325 '(("A" . "[\u0C82-\u0C83]") ; SIGN ANUSVARA .. VISARGA
326 ("V" . "[\u0C85-\u0C94\u0CE0-\u0CE1]") ; independent vowel
327 ("C" . "[\u0C95-\u0CB9\u0CDE]") ; consonant
86a366f4 328 ("R" . "\u0CB0") ; RA
42763dda
KH
329 ("n" . "\u0CBC") ; NUKTA
330 ("v" . "[\u0CBE-\u0CCC\u0CD5-\u0CD6\u0CE2-\u0CE3]") ; vowel sign
331 ("H" . "\u0CCD") ; VIRAMA
332 ("N" . "\u200C") ; ZWNJ
333 ("J" . "\u200D") ; ZWJ
334 ("X" . "[\u0C80-\u0CFF]")))) ; all coverage
335 (indian-compose-regexp
336 (concat
337 ;; syllables with an independent vowel, or
338 "\\(?:RH\\)?Vn?\\(?:J?HC\\)?v?A?\\|"
339 ;; consonant-based syllables, or
86a366f4 340 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?A?\\)\\|"
42763dda 341 ;; special consonant form, or
86a366f4 342 "JHC\\|"
42763dda
KH
343 ;; any other singleton characters
344 "X")
345 table))
ef19e2f3
KH
346 "Regexp matching a composable sequence of Kannada characters.")
347
348(defconst malayalam-composable-pattern
f758cd2a 349 (let ((table
42763dda
KH
350 '(("A" . "[\u0D02-\u0D03]") ; SIGN ANUSVARA .. VISARGA
351 ("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel
f758cd2a 352 ("C" . "[\u0D15-\u0D39]") ; consonant
42763dda 353 ("Y" . "[\u0D2F-\u0D30\u0D32\u0D35]") ; YA, RA, LA, VA
771533aa 354 ("v" . "[\u0D3E-\u0D4C\u0D57\u0D62-\u0D63]") ; postbase matra
86a366f4 355 ("H" . "\u0D4D") ; SIGN VIRAMA
69db641d
KH
356 ("N" . "\u200C") ; ZWNJ
357 ("J" . "\u200D") ; ZWJ
f758cd2a 358 ("X" . "[\u0D00-\u0D7F]")))) ; all coverage
3ff3655c
KH
359 (indian-compose-regexp
360 (concat
f758cd2a 361 ;; consonant-based syllables, or
86a366f4 362 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v?A?\\)\\|"
42763dda
KH
363 ;; syllables with an independent vowel, or
364 "V\\(?:J?HY\\)?v*?A?\\|"
f758cd2a 365 ;; special consonant form, or
42763dda 366 "JHY\\|"
3ff3655c
KH
367 ;; any other singleton characters
368 "X")
369 table))
ef19e2f3
KH
370 "Regexp matching a composable sequence of Malayalam characters.")
371
9b87bff0 372(let ((script-regexp-alist
f758cd2a 373 `((devanagari . ,devanagari-composable-pattern)
42763dda
KH
374 (bengali . ,bengali-composable-pattern)
375 (gurmukhi . ,gurmukhi-composable-pattern)
376 (gujarati . ,gujarati-composable-pattern)
377 (oriya . ,oriya-composable-pattern)
378 (tamil . ,tamil-composable-pattern)
379 (telugu . ,telugu-composable-pattern)
380 (kannada . ,kannada-composable-pattern)
3ff3655c 381 (malayalam . ,malayalam-composable-pattern))))
ef19e2f3
KH
382 (map-char-table
383 #'(lambda (key val)
384 (let ((slot (assq val script-regexp-alist)))
385 (if slot
386 (set-char-table-range
387 composition-function-table key
388 (list (vector (cdr slot) 0 'font-shape-gstring))))))
389 char-script-table))
9b87bff0 390
41da80b1 391(provide 'indian)
1bec6fdb 392
cbee283d 393;; arch-tag: 83aa8fc7-7ee2-4364-a6e5-498f5e3b8c2f
4ed46869 394;;; indian.el ends here