Commit | Line | Data |
---|---|---|
cdbe6a03 | 1 | ;;; indian.el --- Indian languages support -*- coding: utf-8; -*- |
4ed46869 | 2 | |
ae940284 | 3 | ;; Copyright (C) 1997, 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 |
d4877ac1 | 4 | ;; Free Software Foundation, Inc. |
ae940284 | 5 | ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 |
eaa61218 KH |
6 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
7 | ;; Registration Number H14PRO021 | |
4ed46869 | 8 | |
cdbe6a03 KH |
9 | ;; Maintainer: Kenichi Handa <handa@m17n.org> |
10 | ;; KAWABATA, Taichi <kawabata@m17n.org> | |
51896ebc | 11 | ;; Keywords: multilingual, i18n, Indian |
4ed46869 KH |
12 | |
13 | ;; This file is part of GNU Emacs. | |
14 | ||
4936186e | 15 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
4ed46869 | 16 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
17 | ;; the Free Software Foundation, either version 3 of the License, or |
18 | ;; (at your option) any later version. | |
4ed46869 KH |
19 | |
20 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
21 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
22 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
23 | ;; GNU General Public License for more details. | |
24 | ||
25 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 26 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
4ed46869 KH |
27 | |
28 | ;;; Commentary: | |
29 | ||
cdbe6a03 KH |
30 | ;; This file contains definitions of Indian language environments, and |
31 | ;; setups for displaying the scrtipts used there. | |
4ed46869 KH |
32 | |
33 | ;;; Code: | |
34 | ||
e1915ab3 KH |
35 | (define-coding-system 'in-is13194-devanagari |
36 | "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)." | |
37 | :coding-type 'iso-2022 | |
38 | :mnemonic ?D | |
39 | :designation [ascii indian-is13194 nil nil] | |
40 | :charset-list '(ascii indian-is13194) | |
5e145d59 KH |
41 | :post-read-conversion 'in-is13194-post-read-conversion |
42 | :pre-write-conversion 'in-is13194-pre-write-conversion) | |
e1915ab3 KH |
43 | |
44 | (define-coding-system-alias 'devanagari 'in-is13194-devanagari) | |
8805e649 | 45 | |
cdbe6a03 KH |
46 | (set-language-info-alist |
47 | "Devanagari" '((charset unicode) | |
48 | (coding-system utf-8) | |
49 | (coding-priority utf-8) | |
50 | (input-method . "dev-aiba") | |
51 | (documentation . "\ | |
52 | Such languages using Devanagari script as Hindi and Marathi | |
53 | are supported in this language environment.")) | |
54 | '("Indian")) | |
55 | ||
56 | (set-language-info-alist | |
57 | "Bengali" '((charset unicode) | |
58 | (coding-system utf-8) | |
59 | (coding-priority utf-8) | |
60 | (input-method . "bengali-itrans") | |
61 | (documentation . "\ | |
62 | Such languages using Bengali script as Bengali and Assamese | |
63 | are supported in this language environment.")) | |
64 | '("Indian")) | |
65 | ||
66 | (set-language-info-alist | |
67 | "Punjabi" '((charset unicode) | |
68 | (coding-system utf-8) | |
69 | (coding-priority utf-8) | |
70 | (input-method . "punjabi-itrans") | |
71 | (documentation . "\ | |
72 | North Indian language Punjabi is supported in this language environment.")) | |
73 | '("Indian")) | |
74 | ||
75 | (set-language-info-alist | |
76 | "Gujarati" '((charset unicode) | |
77 | (coding-system utf-8) | |
78 | (coding-priority utf-8) | |
79 | (input-method . "gujarati-itrans") | |
80 | (documentation . "\ | |
81 | North Indian language Gujarati is supported in this language environment.")) | |
82 | '("Indian")) | |
83 | ||
84 | (set-language-info-alist | |
85 | "Oriya" '((charset unicode) | |
86 | (coding-system utf-8) | |
87 | (coding-priority utf-8) | |
88 | (input-method . "oriya-itrans") | |
89 | (documentation . "\ | |
90 | Such languages using Oriya script as Oriya, Khonti, and Santali | |
91 | are supported in this language environment.")) | |
92 | '("Indian")) | |
93 | ||
94 | (set-language-info-alist | |
95 | "Tamil" '((charset unicode) | |
96 | (coding-system utf-8) | |
97 | (coding-priority utf-8) | |
98 | (input-method . "tamil-itrans") | |
99 | (documentation . "\ | |
100 | South Indian Language Tamil is supported in this language environment.")) | |
101 | '("Indian")) | |
102 | ||
103 | (set-language-info-alist | |
104 | "Telugu" '((charset unicode) | |
105 | (coding-system utf-8) | |
106 | (coding-priority utf-8) | |
107 | (input-method . "telugu-itrans") | |
108 | (documentation . "\ | |
109 | South Indian Language Telugu is supported in this language environment.")) | |
110 | '("Indian")) | |
111 | ||
112 | (set-language-info-alist | |
113 | "Kannada" '((charset unicode) | |
114 | (coding-system mule-utf-8) | |
115 | (coding-priority mule-utf-8) | |
116 | (input-method . "kannada-itrans") | |
117 | (sample-text . "Kannada (ಕನ್ನಡ) ನಮಸ್ಕಾರ") | |
118 | (documentation . "\ | |
119 | Kannada language and script is supported in this language | |
120 | environment.")) | |
121 | '("Indian")) | |
122 | ||
123 | (set-language-info-alist | |
124 | "Malayalam" '((charset unicode) | |
125 | (coding-system utf-8) | |
126 | (coding-priority utf-8) | |
127 | (input-method . "malayalam-itrans") | |
128 | (documentation . "\ | |
129 | South Indian language Malayalam is supported in this language environment.")) | |
130 | '("Indian")) | |
098d86d5 | 131 | |
3ff3655c KH |
132 | ;; Replace mnemonic characters in REGEXP according to TABLE. TABLE is |
133 | ;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING). | |
134 | ||
135 | (defun indian-compose-regexp (regexp table) | |
136 | (let ((case-fold-search nil)) | |
137 | (dolist (elt table) | |
138 | (setq regexp (replace-regexp-in-string (car elt) (cdr elt) regexp t t))) | |
139 | regexp)) | |
140 | ||
ef19e2f3 KH |
141 | (defconst devanagari-composable-pattern |
142 | (concat | |
143 | "\\([अ-औॠॡ][ँं]?\\)\\|[ः।]" | |
144 | "\\|\\(" | |
145 | "\\(?:\\(?:[क-हक़-य़]्\\)?\\(?:[क-हक़-य़]्\\)?\\(?:[क-हक़-य़]्\\)?[क-हक़-य़]्\\)?" | |
146 | "[क-हक़-य़]\\(?:्\\|[ा-्ॢॣ]?[ंँ]?\\)?" | |
147 | "\\)") | |
148 | "Regexp matching a composable sequence of Devanagari characters.") | |
149 | ||
150 | (defconst tamil-composable-pattern | |
151 | (concat | |
152 | "\\([அ-ஔ]\\)\\|" | |
153 | "[ஂஃ]\\|" ;; vowel modifier considered independent | |
154 | "\\(\\(?:\\(?:க்ஷ\\)\\|[க-ஹ]\\)[்ா-ௌ]?\\)\\|" | |
155 | "\\(ஷ்ரீ\\)") | |
156 | "Regexp matching a composable sequence of Tamil characters.") | |
157 | ||
158 | (defconst kannada-composable-pattern | |
159 | (concat | |
160 | "\\([ಂ-ಔೠಌ]\\)\\|[ಃ]" | |
161 | "\\|\\(" | |
162 | "\\(?:\\(?:[ಕ-ಹ]್\\)?\\(?:[ಕ-ಹ]್\\)?\\(?:[ಕ-ಹ]್\\)?[ಕ-ಹ]್\\)?" | |
163 | "[ಕ-ಹ]\\(?:್\\|[ಾ-್ೕೃ]?\\)?" | |
164 | "\\)") | |
165 | "Regexp matching a composable sequence of Kannada characters.") | |
166 | ||
167 | (defconst malayalam-composable-pattern | |
3ff3655c KH |
168 | (let ((table '(("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel |
169 | ("C" . "[\u0D15-\u0D39]") ; consonant | |
170 | ("m" . "[\u0D46-\u0D48\u0D4A-\u0D4C]") ; prebase matra | |
171 | ("p" . "[\u0D3E-\u0D44\u0D57]") ; postname matra | |
172 | ("b" . "[\u0D62-\u0D63]") ; belowbase matra | |
173 | ("a" . "[\u0D02-\u0D03]") ; abovebase sign | |
174 | ("H" . "്") ; virama sign | |
175 | ("N" . "\u200D") ; ZWJ | |
176 | ("J" . "\u200C") ; ZWNJ | |
177 | ("X" . "[\u0D00-\u0D7F]")))) ; all coverage | |
178 | (indian-compose-regexp | |
179 | (concat | |
180 | ;; consonant-based syllables | |
181 | "\\(CJ?HJ?\\)*C\\(H[NJ]?\\|m?b?p?a?\\)\\|" | |
182 | ;; syllables with an independent vowel | |
cb14aa2e | 183 | "V\\(J?HC\\)?m?b?p?a?\\|" |
3ff3655c KH |
184 | ;; special consonant form |
185 | "JHC\\|" | |
186 | ;; any other singleton characters | |
187 | "X") | |
188 | table)) | |
ef19e2f3 KH |
189 | "Regexp matching a composable sequence of Malayalam characters.") |
190 | ||
9b87bff0 | 191 | (let ((script-regexp-alist |
1ff4cb98 | 192 | `((devanagari . "[\x900-\x97F\x200C\x200D]+") |
9b87bff0 KH |
193 | (bengali . "[\x980-\x9FF\x200C\x200D]+") |
194 | (gurmukhi . "[\xA00-\xA7F\x200C\x200D]+") | |
195 | (gujarati . "[\xA80-\xAFF\x200C\x200D]+") | |
196 | (oriya . "[\xB00-\xB7F\x200C\x200D]+") | |
1ff4cb98 | 197 | (tamil . "[\xB80-\xBFF\x200C\x200D]+") |
9b87bff0 | 198 | (telugu . "[\xC00-\xC7F\x200C\x200D]+") |
1ff4cb98 | 199 | (kannada . "[\xC80-\xCFF\x200C\x200D]+") |
3ff3655c | 200 | (malayalam . ,malayalam-composable-pattern)))) |
ef19e2f3 KH |
201 | (map-char-table |
202 | #'(lambda (key val) | |
203 | (let ((slot (assq val script-regexp-alist))) | |
204 | (if slot | |
205 | (set-char-table-range | |
206 | composition-function-table key | |
207 | (list (vector (cdr slot) 0 'font-shape-gstring)))))) | |
208 | char-script-table)) | |
9b87bff0 | 209 | |
41da80b1 | 210 | (provide 'indian) |
1bec6fdb | 211 | |
cbee283d | 212 | ;; arch-tag: 83aa8fc7-7ee2-4364-a6e5-498f5e3b8c2f |
4ed46869 | 213 | ;;; indian.el ends here |