Update copyright notices for 2013.
[bpt/emacs.git] / lisp / language / indian.el
CommitLineData
cdbe6a03 1;;; indian.el --- Indian languages support -*- coding: utf-8; -*-
4ed46869 2
ab422c4d 3;; Copyright (C) 1997, 1999, 2001-2013 Free Software Foundation, Inc.
5df4f04c 4;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
eaa61218
KH
5;; National Institute of Advanced Industrial Science and Technology (AIST)
6;; Registration Number H14PRO021
4ed46869 7
cdbe6a03
KH
8;; Maintainer: Kenichi Handa <handa@m17n.org>
9;; KAWABATA, Taichi <kawabata@m17n.org>
51896ebc 10;; Keywords: multilingual, i18n, Indian
4ed46869
KH
11
12;; This file is part of GNU Emacs.
13
4936186e 14;; GNU Emacs is free software: you can redistribute it and/or modify
4ed46869 15;; it under the terms of the GNU General Public License as published by
4936186e
GM
16;; the Free Software Foundation, either version 3 of the License, or
17;; (at your option) any later version.
4ed46869
KH
18
19;; GNU Emacs is distributed in the hope that it will be useful,
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
4936186e 25;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
4ed46869
KH
26
27;;; Commentary:
28
cdbe6a03
KH
29;; This file contains definitions of Indian language environments, and
30;; setups for displaying the scrtipts used there.
4ed46869
KH
31
32;;; Code:
33
e1915ab3
KH
34(define-coding-system 'in-is13194-devanagari
35 "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)."
36 :coding-type 'iso-2022
37 :mnemonic ?D
38 :designation [ascii indian-is13194 nil nil]
39 :charset-list '(ascii indian-is13194)
5e145d59
KH
40 :post-read-conversion 'in-is13194-post-read-conversion
41 :pre-write-conversion 'in-is13194-pre-write-conversion)
e1915ab3
KH
42
43(define-coding-system-alias 'devanagari 'in-is13194-devanagari)
8805e649 44
cdbe6a03
KH
45(set-language-info-alist
46 "Devanagari" '((charset unicode)
47 (coding-system utf-8)
48 (coding-priority utf-8)
7019c177 49 (input-method . "devanagari-aiba")
cdbe6a03
KH
50 (documentation . "\
51Such languages using Devanagari script as Hindi and Marathi
52are supported in this language environment."))
53 '("Indian"))
54
55(set-language-info-alist
56 "Bengali" '((charset unicode)
57 (coding-system utf-8)
58 (coding-priority utf-8)
59 (input-method . "bengali-itrans")
60 (documentation . "\
61Such languages using Bengali script as Bengali and Assamese
62are supported in this language environment."))
63 '("Indian"))
64
65(set-language-info-alist
66 "Punjabi" '((charset unicode)
67 (coding-system utf-8)
68 (coding-priority utf-8)
69 (input-method . "punjabi-itrans")
70 (documentation . "\
71North Indian language Punjabi is supported in this language environment."))
72 '("Indian"))
73
74(set-language-info-alist
75 "Gujarati" '((charset unicode)
76 (coding-system utf-8)
77 (coding-priority utf-8)
78 (input-method . "gujarati-itrans")
79 (documentation . "\
80North Indian language Gujarati is supported in this language environment."))
81 '("Indian"))
82
83(set-language-info-alist
84 "Oriya" '((charset unicode)
85 (coding-system utf-8)
86 (coding-priority utf-8)
87 (input-method . "oriya-itrans")
88 (documentation . "\
89Such languages using Oriya script as Oriya, Khonti, and Santali
90are supported in this language environment."))
91 '("Indian"))
92
93(set-language-info-alist
94 "Tamil" '((charset unicode)
95 (coding-system utf-8)
96 (coding-priority utf-8)
97 (input-method . "tamil-itrans")
98 (documentation . "\
99South Indian Language Tamil is supported in this language environment."))
100 '("Indian"))
101
102(set-language-info-alist
103 "Telugu" '((charset unicode)
104 (coding-system utf-8)
105 (coding-priority utf-8)
106 (input-method . "telugu-itrans")
107 (documentation . "\
108South Indian Language Telugu is supported in this language environment."))
109 '("Indian"))
110
111(set-language-info-alist
112 "Kannada" '((charset unicode)
113 (coding-system mule-utf-8)
114 (coding-priority mule-utf-8)
115 (input-method . "kannada-itrans")
116 (sample-text . "Kannada (ಕನ್ನಡ) ನಮಸ್ಕಾರ")
117 (documentation . "\
118Kannada language and script is supported in this language
119environment."))
120 '("Indian"))
121
122(set-language-info-alist
123 "Malayalam" '((charset unicode)
124 (coding-system utf-8)
125 (coding-priority utf-8)
126 (input-method . "malayalam-itrans")
127 (documentation . "\
128South Indian language Malayalam is supported in this language environment."))
129 '("Indian"))
098d86d5 130
3ff3655c
KH
131;; Replace mnemonic characters in REGEXP according to TABLE. TABLE is
132;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING).
133
134(defun indian-compose-regexp (regexp table)
135 (let ((case-fold-search nil))
136 (dolist (elt table)
137 (setq regexp (replace-regexp-in-string (car elt) (cdr elt) regexp t t)))
138 regexp))
139
ef19e2f3 140(defconst devanagari-composable-pattern
f758cd2a 141 (let ((table
42763dda
KH
142 '(("a" . "[\u0900-\u0902]") ; vowel modifier (above)
143 ("A" . "\u0903") ; vowel modifier (post)
144 ("V" . "[\u0904-\u0914\u0960-\u0961\u0972]") ; independent vowel
145 ("C" . "[\u0915-\u0939\u0958-\u095F\u0979-\u097F]") ; consonant
146 ("R" . "\u0930") ; RA
147 ("n" . "\u093C") ; NUKTA
148 ("v" . "[\u093E-\u094C\u094E\u0955\u0962-\u0963]") ; vowel sign
149 ("H" . "\u094D") ; HALANT
150 ("s" . "[\u0951-\u0952]") ; stress sign
151 ("t" . "[\u0953-\u0954]") ; accent
152 ("N" . "\u200C") ; ZWNJ
153 ("J" . "\u200D") ; ZWJ
154 ("X" . "[\u0900-\u097F]")))) ; all coverage
f758cd2a
KH
155 (indian-compose-regexp
156 (concat
157 ;; syllables with an independent vowel, or
42763dda 158 "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?s?t?A?\\|"
f758cd2a 159 ;; consonant-based syllables, or
42763dda 160 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?s?t?A?\\)\\|"
f758cd2a
KH
161 ;; special consonant form, or
162 "JHR\\|"
163 ;; any other singleton characters
164 "X")
165 table))
ef19e2f3
KH
166 "Regexp matching a composable sequence of Devanagari characters.")
167
42763dda
KH
168(defconst bengali-composable-pattern
169 (let ((table
170 '(("a" . "\u0981") ; SIGN CANDRABINDU
171 ("A" . "[\u0982-\u0983]") ; SIGN ANUSVARA .. VISARGA
172 ("V" . "[\u0985-\u0994\u09E0-\u09E1]") ; independent vowel
173 ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant
174 ("B" . "[\u09AC\u09AF-\u09B0\u09F0]") ; BA, YA, RA
175 ("R" . "[\u09B0\u09F0]") ; RA
176 ("n" . "\u09BC") ; NUKTA
177 ("v" . "[\u09BE-\u09CC\u09D7\u09E2-\u09E3]") ; vowel sign
178 ("H" . "\u09CD") ; HALANT
179 ("T" . "\u09CE") ; KHANDA TA
180 ("N" . "\u200C") ; ZWNJ
181 ("J" . "\u200D") ; ZWJ
182 ("X" . "[\u0980-\u09FF]")))) ; all coverage
183 (indian-compose-regexp
184 (concat
185 ;; syllables with an independent vowel, or
186 "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|"
187 ;; consonant-based syllables, or
188 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?\\)\\|"
189 ;; another syllables with an independent vowel, or
190 "\\(?:RH\\)?T\\|"
191 ;; special consonant form, or
192 "JHB\\|"
193 ;; any other singleton characters
194 "X")
195 table))
196 "Regexp matching a composable sequence of Bengali characters.")
197
198(defconst gurmukhi-composable-pattern
199 (let ((table
ece33a6e
KH
200 '(("a" . "[\u0A01-\u0A02\u0A70]") ; SIGN ADAK BINDI .. BINDI, TIPPI
201 ("A" . "\u0A03") ; SIGN VISARGA
42763dda
KH
202 ("V" . "[\u0A05-\u0A14]") ; independent vowel
203 ("C" . "[\u0A15-\u0A39\u0A59-\u0A5E]") ; consonant
ece33a6e 204 ("Y" . "[\u0A2F-u0A30\u0A35\u0A39]") ; YA, RA, VA, HA
42763dda
KH
205 ("n" . "\u0A3C") ; NUKTA
206 ("v" . "[\u0A3E-\u0A4C]") ; vowel sign
207 ("H" . "\u0A4D") ; VIRAMA
42763dda
KH
208 ("N" . "\u200C") ; ZWNJ
209 ("J" . "\u200D") ; ZWJ
210 ("X" . "[\u0A00-\u0A7F]")))) ; all coverage
211 (indian-compose-regexp
212 (concat
213 ;; consonant-based syllables, or
214 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
215 ;; syllables with an independent vowel, or
216 "Vn?\\(?:J?HY\\)?v*n?a?A?\\|"
217 ;; special consonant form, or
218 "JHY\\|"
219 ;; any other singleton characters
220 "X")
221 table))
222 "Regexp matching a composable sequence of Gurmukhi characters.")
223
224(defconst gujarati-composable-pattern
225 (let ((table
226 '(("a" . "[\u0A81-\u0A82]") ; SIGN CANDRABINDU .. ANUSVARA
b106e264 227 ("A" . "\u0A83") ; SIGN VISARGA
42763dda
KH
228 ("V" . "[\u0A85-\u0A94\u0AE0-\u0AE1]") ; independent vowel
229 ("C" . "[\u0A95-\u0AB9]") ; consonant
230 ("R" . "\u0AB0") ; RA
231 ("n" . "\u0ABC") ; NUKTA
232 ("v" . "[\u0ABE-\u0ACC\u0AE2-\u0AE3]") ; vowel sign
233 ("H" . "\u0ACD") ; VIRAMA
234 ("N" . "\u200C") ; ZWNJ
235 ("J" . "\u200D") ; ZWJ
236 ("X" . "[\u0A80-\u0AFF]")))) ; all coverage
237 (indian-compose-regexp
238 (concat
239 ;; syllables with an independent vowel, or
240 "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?A?\\|"
241 ;; consonant-based syllables, or
86a366f4 242 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
42763dda
KH
243 ;; special consonant form, or
244 "JHR\\|"
245 ;; any other singleton characters
246 "X")
247 table))
248 "Regexp matching a composable sequence of Gujarati characters.")
249
250(defconst oriya-composable-pattern
251 (let ((table
252 '(("a" . "\u0B01") ; SIGN CANDRABINDU
253 ("A" . "[\u0B02-\u0B03]") ; SIGN ANUSVARA .. VISARGA
254 ("V" . "[\u0B05-\u0B14\u0B60-\u0B61]") ; independent vowel
255 ("C" . "[\u0B15-\u0B39\u0B5C-\u0B5D\u0B71]") ; consonant
256 ("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23-\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38-\u0B39]") ; consonant with below form
86a366f4 257 ("R" . "\u0B30") ; RA
42763dda 258 ("n" . "\u0B3C") ; NUKTA
771533aa 259 ("v" . "[\u0B3E-\u0B4C\u0B56-\u0B57\u0B62-\u0B63]") ; vowel sign
42763dda
KH
260 ("H" . "\u0B4D") ; VIRAMA
261 ("N" . "\u200C") ; ZWNJ
262 ("J" . "\u200D") ; ZWJ
263 ("X" . "[\u0B00-\u0B7F]")))) ; all coverage
264 (indian-compose-regexp
265 (concat
266 ;; syllables with an independent vowel, or
267 "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|"
268 ;; consonant-based syllables, or
86a366f4 269 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
42763dda
KH
270 ;; special consonant form, or
271 "JHB\\|"
272 ;; any other singleton characters
273 "X")
274 table))
275 "Regexp matching a composable sequence of Oriya characters.")
276
ef19e2f3 277(defconst tamil-composable-pattern
42763dda
KH
278 (let ((table
279 '(("a" . "\u0B82") ; SIGN ANUSVARA
280 ("V" . "[\u0B85-\u0B94]") ; independent vowel
281 ("C" . "[\u0B95-\u0BB9]") ; consonant
771533aa 282 ("v" . "[\u0BBE-\u0BCC\u0BD7]") ; vowel sign
42763dda
KH
283 ("H" . "\u0BCD") ; VIRAMA
284 ("N" . "\u200C") ; ZWNJ
285 ("J" . "\u200D") ; ZWJ
286 ("X" . "[\u0B80-\u0BFF]")))) ; all coverage
287 (indian-compose-regexp
288 (concat
289 ;; consonant-based syllables, or
86a366f4 290 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|"
42763dda
KH
291 ;; syllables with an independent vowel, or
292 "Vv*a?\\|"
293 ;; any other singleton characters
294 "X")
295 table))
ef19e2f3
KH
296 "Regexp matching a composable sequence of Tamil characters.")
297
42763dda
KH
298(defconst telugu-composable-pattern
299 (let ((table
300 '(("a" . "[\u0C01-\u0C03]") ; SIGN CANDRABINDU .. VISARGA
301 ("V" . "[\u0C05-\u0C14\u0C60-\u0C61]") ; independent vowel
302 ("C" . "[\u0C15-\u0C39\u0C58-\u0C59]") ; consonant
303 ("v" . "[\u0C3E-\u0C4C\u0C55-\u0C56\u0C62-\u0C63]") ; vowel sign
86a366f4 304 ("H" . "\u0C4D") ; VIRAMA
42763dda
KH
305 ("N" . "\u200C") ; ZWNJ
306 ("J" . "\u200D") ; ZWJ
307 ("X" . "[\u0C00-\u0C7F]")))) ; all coverage
308 (indian-compose-regexp
309 (concat
310 ;; consonant-based syllables, or
86a366f4 311 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|"
42763dda
KH
312 ;; syllables with an independent vowel, or
313 "V\\(?:J?HC\\)?v*a?\\|"
314 ;; special consonant form, or
315 "JHC\\|"
316 ;; any other singleton characters
317 "X")
318 table))
319 "Regexp matching a composable sequence of Telugu characters.")
320
ef19e2f3 321(defconst kannada-composable-pattern
42763dda
KH
322 (let ((table
323 '(("A" . "[\u0C82-\u0C83]") ; SIGN ANUSVARA .. VISARGA
324 ("V" . "[\u0C85-\u0C94\u0CE0-\u0CE1]") ; independent vowel
325 ("C" . "[\u0C95-\u0CB9\u0CDE]") ; consonant
86a366f4 326 ("R" . "\u0CB0") ; RA
42763dda
KH
327 ("n" . "\u0CBC") ; NUKTA
328 ("v" . "[\u0CBE-\u0CCC\u0CD5-\u0CD6\u0CE2-\u0CE3]") ; vowel sign
329 ("H" . "\u0CCD") ; VIRAMA
330 ("N" . "\u200C") ; ZWNJ
331 ("J" . "\u200D") ; ZWJ
332 ("X" . "[\u0C80-\u0CFF]")))) ; all coverage
333 (indian-compose-regexp
334 (concat
335 ;; syllables with an independent vowel, or
336 "\\(?:RH\\)?Vn?\\(?:J?HC\\)?v?A?\\|"
337 ;; consonant-based syllables, or
86a366f4 338 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?A?\\)\\|"
42763dda 339 ;; special consonant form, or
86a366f4 340 "JHC\\|"
42763dda
KH
341 ;; any other singleton characters
342 "X")
343 table))
ef19e2f3
KH
344 "Regexp matching a composable sequence of Kannada characters.")
345
346(defconst malayalam-composable-pattern
f758cd2a 347 (let ((table
42763dda
KH
348 '(("A" . "[\u0D02-\u0D03]") ; SIGN ANUSVARA .. VISARGA
349 ("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel
f758cd2a 350 ("C" . "[\u0D15-\u0D39]") ; consonant
42763dda 351 ("Y" . "[\u0D2F-\u0D30\u0D32\u0D35]") ; YA, RA, LA, VA
771533aa 352 ("v" . "[\u0D3E-\u0D4C\u0D57\u0D62-\u0D63]") ; postbase matra
86a366f4 353 ("H" . "\u0D4D") ; SIGN VIRAMA
69db641d
KH
354 ("N" . "\u200C") ; ZWNJ
355 ("J" . "\u200D") ; ZWJ
f758cd2a 356 ("X" . "[\u0D00-\u0D7F]")))) ; all coverage
3ff3655c
KH
357 (indian-compose-regexp
358 (concat
f758cd2a 359 ;; consonant-based syllables, or
86a366f4 360 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v?A?\\)\\|"
42763dda
KH
361 ;; syllables with an independent vowel, or
362 "V\\(?:J?HY\\)?v*?A?\\|"
f758cd2a 363 ;; special consonant form, or
42763dda 364 "JHY\\|"
3ff3655c
KH
365 ;; any other singleton characters
366 "X")
367 table))
ef19e2f3
KH
368 "Regexp matching a composable sequence of Malayalam characters.")
369
9b87bff0 370(let ((script-regexp-alist
f758cd2a 371 `((devanagari . ,devanagari-composable-pattern)
42763dda
KH
372 (bengali . ,bengali-composable-pattern)
373 (gurmukhi . ,gurmukhi-composable-pattern)
374 (gujarati . ,gujarati-composable-pattern)
375 (oriya . ,oriya-composable-pattern)
376 (tamil . ,tamil-composable-pattern)
377 (telugu . ,telugu-composable-pattern)
378 (kannada . ,kannada-composable-pattern)
3ff3655c 379 (malayalam . ,malayalam-composable-pattern))))
ef19e2f3
KH
380 (map-char-table
381 #'(lambda (key val)
382 (let ((slot (assq val script-regexp-alist)))
383 (if slot
384 (set-char-table-range
385 composition-function-table key
386 (list (vector (cdr slot) 0 'font-shape-gstring))))))
387 char-script-table))
9b87bff0 388
41da80b1 389(provide 'indian)
1bec6fdb 390
4ed46869 391;;; indian.el ends here