| 1 | ;;; indian.el --- Indian languages support -*- coding: utf-8; -*- |
| 2 | |
| 3 | ;; Copyright (C) 1997, 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 |
| 4 | ;; Free Software Foundation, Inc. |
| 5 | ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 |
| 6 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
| 7 | ;; Registration Number H14PRO021 |
| 8 | |
| 9 | ;; Maintainer: Kenichi Handa <handa@m17n.org> |
| 10 | ;; KAWABATA, Taichi <kawabata@m17n.org> |
| 11 | ;; Keywords: multilingual, i18n, Indian |
| 12 | |
| 13 | ;; This file is part of GNU Emacs. |
| 14 | |
| 15 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 16 | ;; it under the terms of the GNU General Public License as published by |
| 17 | ;; the Free Software Foundation, either version 3 of the License, or |
| 18 | ;; (at your option) any later version. |
| 19 | |
| 20 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 21 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 22 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 23 | ;; GNU General Public License for more details. |
| 24 | |
| 25 | ;; You should have received a copy of the GNU General Public License |
| 26 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 27 | |
| 28 | ;;; Commentary: |
| 29 | |
| 30 | ;; This file contains definitions of Indian language environments, and |
| 31 | ;; setups for displaying the scrtipts used there. |
| 32 | |
| 33 | ;;; Code: |
| 34 | |
| 35 | (define-coding-system 'in-is13194-devanagari |
| 36 | "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)." |
| 37 | :coding-type 'iso-2022 |
| 38 | :mnemonic ?D |
| 39 | :designation [ascii indian-is13194 nil nil] |
| 40 | :charset-list '(ascii indian-is13194) |
| 41 | :post-read-conversion 'in-is13194-post-read-conversion |
| 42 | :pre-write-conversion 'in-is13194-pre-write-conversion) |
| 43 | |
| 44 | (define-coding-system-alias 'devanagari 'in-is13194-devanagari) |
| 45 | |
| 46 | (set-language-info-alist |
| 47 | "Devanagari" '((charset unicode) |
| 48 | (coding-system utf-8) |
| 49 | (coding-priority utf-8) |
| 50 | (input-method . "dev-aiba") |
| 51 | (documentation . "\ |
| 52 | Such languages using Devanagari script as Hindi and Marathi |
| 53 | are supported in this language environment.")) |
| 54 | '("Indian")) |
| 55 | |
| 56 | (set-language-info-alist |
| 57 | "Bengali" '((charset unicode) |
| 58 | (coding-system utf-8) |
| 59 | (coding-priority utf-8) |
| 60 | (input-method . "bengali-itrans") |
| 61 | (documentation . "\ |
| 62 | Such languages using Bengali script as Bengali and Assamese |
| 63 | are supported in this language environment.")) |
| 64 | '("Indian")) |
| 65 | |
| 66 | (set-language-info-alist |
| 67 | "Punjabi" '((charset unicode) |
| 68 | (coding-system utf-8) |
| 69 | (coding-priority utf-8) |
| 70 | (input-method . "punjabi-itrans") |
| 71 | (documentation . "\ |
| 72 | North Indian language Punjabi is supported in this language environment.")) |
| 73 | '("Indian")) |
| 74 | |
| 75 | (set-language-info-alist |
| 76 | "Gujarati" '((charset unicode) |
| 77 | (coding-system utf-8) |
| 78 | (coding-priority utf-8) |
| 79 | (input-method . "gujarati-itrans") |
| 80 | (documentation . "\ |
| 81 | North Indian language Gujarati is supported in this language environment.")) |
| 82 | '("Indian")) |
| 83 | |
| 84 | (set-language-info-alist |
| 85 | "Oriya" '((charset unicode) |
| 86 | (coding-system utf-8) |
| 87 | (coding-priority utf-8) |
| 88 | (input-method . "oriya-itrans") |
| 89 | (documentation . "\ |
| 90 | Such languages using Oriya script as Oriya, Khonti, and Santali |
| 91 | are supported in this language environment.")) |
| 92 | '("Indian")) |
| 93 | |
| 94 | (set-language-info-alist |
| 95 | "Tamil" '((charset unicode) |
| 96 | (coding-system utf-8) |
| 97 | (coding-priority utf-8) |
| 98 | (input-method . "tamil-itrans") |
| 99 | (documentation . "\ |
| 100 | South Indian Language Tamil is supported in this language environment.")) |
| 101 | '("Indian")) |
| 102 | |
| 103 | (set-language-info-alist |
| 104 | "Telugu" '((charset unicode) |
| 105 | (coding-system utf-8) |
| 106 | (coding-priority utf-8) |
| 107 | (input-method . "telugu-itrans") |
| 108 | (documentation . "\ |
| 109 | South Indian Language Telugu is supported in this language environment.")) |
| 110 | '("Indian")) |
| 111 | |
| 112 | (set-language-info-alist |
| 113 | "Kannada" '((charset unicode) |
| 114 | (coding-system mule-utf-8) |
| 115 | (coding-priority mule-utf-8) |
| 116 | (input-method . "kannada-itrans") |
| 117 | (sample-text . "Kannada (ಕನ್ನಡ) ನಮಸ್ಕಾರ") |
| 118 | (documentation . "\ |
| 119 | Kannada language and script is supported in this language |
| 120 | environment.")) |
| 121 | '("Indian")) |
| 122 | |
| 123 | (set-language-info-alist |
| 124 | "Malayalam" '((charset unicode) |
| 125 | (coding-system utf-8) |
| 126 | (coding-priority utf-8) |
| 127 | (input-method . "malayalam-itrans") |
| 128 | (documentation . "\ |
| 129 | South Indian language Malayalam is supported in this language environment.")) |
| 130 | '("Indian")) |
| 131 | |
| 132 | ;; Replace mnemonic characters in REGEXP according to TABLE. TABLE is |
| 133 | ;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING). |
| 134 | |
| 135 | (defun indian-compose-regexp (regexp table) |
| 136 | (let ((case-fold-search nil)) |
| 137 | (dolist (elt table) |
| 138 | (setq regexp (replace-regexp-in-string (car elt) (cdr elt) regexp t t))) |
| 139 | regexp)) |
| 140 | |
| 141 | (defconst devanagari-composable-pattern |
| 142 | (let ((table |
| 143 | '(("a" . "[\u0900-\u0902]") ; vowel modifier (above) |
| 144 | ("A" . "\u0903") ; vowel modifier (post) |
| 145 | ("V" . "[\u0904-\u0914\u0960-\u0961\u0972]") ; independent vowel |
| 146 | ("C" . "[\u0915-\u0939\u0958-\u095F\u0979-\u097F]") ; consonant |
| 147 | ("R" . "\u0930") ; RA |
| 148 | ("n" . "\u093C") ; NUKTA |
| 149 | ("v" . "[\u093E-\u094C\u094E\u0955\u0962-\u0963]") ; vowel sign |
| 150 | ("H" . "\u094D") ; HALANT |
| 151 | ("s" . "[\u0951-\u0952]") ; stress sign |
| 152 | ("t" . "[\u0953-\u0954]") ; accent |
| 153 | ("N" . "\u200C") ; ZWNJ |
| 154 | ("J" . "\u200D") ; ZWJ |
| 155 | ("X" . "[\u0900-\u097F]")))) ; all coverage |
| 156 | (indian-compose-regexp |
| 157 | (concat |
| 158 | ;; syllables with an independent vowel, or |
| 159 | "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?s?t?A?\\|" |
| 160 | ;; consonant-based syllables, or |
| 161 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?s?t?A?\\)\\|" |
| 162 | ;; special consonant form, or |
| 163 | "JHR\\|" |
| 164 | ;; any other singleton characters |
| 165 | "X") |
| 166 | table)) |
| 167 | "Regexp matching a composable sequence of Devanagari characters.") |
| 168 | |
| 169 | (defconst bengali-composable-pattern |
| 170 | (let ((table |
| 171 | '(("a" . "\u0981") ; SIGN CANDRABINDU |
| 172 | ("A" . "[\u0982-\u0983]") ; SIGN ANUSVARA .. VISARGA |
| 173 | ("V" . "[\u0985-\u0994\u09E0-\u09E1]") ; independent vowel |
| 174 | ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant |
| 175 | ("B" . "[\u09AC\u09AF-\u09B0\u09F0]") ; BA, YA, RA |
| 176 | ("R" . "[\u09B0\u09F0]") ; RA |
| 177 | ("n" . "\u09BC") ; NUKTA |
| 178 | ("v" . "[\u09BE-\u09CC\u09D7\u09E2-\u09E3]") ; vowel sign |
| 179 | ("H" . "\u09CD") ; HALANT |
| 180 | ("T" . "\u09CE") ; KHANDA TA |
| 181 | ("N" . "\u200C") ; ZWNJ |
| 182 | ("J" . "\u200D") ; ZWJ |
| 183 | ("X" . "[\u0980-\u09FF]")))) ; all coverage |
| 184 | (indian-compose-regexp |
| 185 | (concat |
| 186 | ;; syllables with an independent vowel, or |
| 187 | "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|" |
| 188 | ;; consonant-based syllables, or |
| 189 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?\\)\\|" |
| 190 | ;; another syllables with an independent vowel, or |
| 191 | "\\(?:RH\\)?T\\|" |
| 192 | ;; special consonant form, or |
| 193 | "JHB\\|" |
| 194 | ;; any other singleton characters |
| 195 | "X") |
| 196 | table)) |
| 197 | "Regexp matching a composable sequence of Bengali characters.") |
| 198 | |
| 199 | (defconst gurmukhi-composable-pattern |
| 200 | (let ((table |
| 201 | '(("a" . "[\u0A01-\u0A02]") ; SIGN ADAK BINDI .. BINDI |
| 202 | ("A" . "\u0A03]") ; SIGN VISARGA |
| 203 | ("V" . "[\u0A05-\u0A14]") ; independent vowel |
| 204 | ("C" . "[\u0A15-\u0A39\u0A59-\u0A5E]") ; consonant |
| 205 | ("Y" . "[\u0A2F\u0A30\u0A35\u0A39]") ; YA, RA, VA, HA |
| 206 | ("n" . "\u0A3C") ; NUKTA |
| 207 | ("v" . "[\u0A3E-\u0A4C]") ; vowel sign |
| 208 | ("H" . "\u0A4D") ; VIRAMA |
| 209 | ("a" . "\u0A70") ; TIPPI |
| 210 | ("N" . "\u200C") ; ZWNJ |
| 211 | ("J" . "\u200D") ; ZWJ |
| 212 | ("X" . "[\u0A00-\u0A7F]")))) ; all coverage |
| 213 | (indian-compose-regexp |
| 214 | (concat |
| 215 | ;; consonant-based syllables, or |
| 216 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|" |
| 217 | ;; syllables with an independent vowel, or |
| 218 | "Vn?\\(?:J?HY\\)?v*n?a?A?\\|" |
| 219 | ;; special consonant form, or |
| 220 | "JHY\\|" |
| 221 | ;; any other singleton characters |
| 222 | "X") |
| 223 | table)) |
| 224 | "Regexp matching a composable sequence of Gurmukhi characters.") |
| 225 | |
| 226 | (defconst gujarati-composable-pattern |
| 227 | (let ((table |
| 228 | '(("a" . "[\u0A81-\u0A82]") ; SIGN CANDRABINDU .. ANUSVARA |
| 229 | ("A" . "\u0A83]") ; SIGN VISARGA |
| 230 | ("V" . "[\u0A85-\u0A94\u0AE0-\u0AE1]") ; independent vowel |
| 231 | ("C" . "[\u0A95-\u0AB9]") ; consonant |
| 232 | ("R" . "\u0AB0") ; RA |
| 233 | ("n" . "\u0ABC") ; NUKTA |
| 234 | ("v" . "[\u0ABE-\u0ACC\u0AE2-\u0AE3]") ; vowel sign |
| 235 | ("H" . "\u0ACD") ; VIRAMA |
| 236 | ("N" . "\u200C") ; ZWNJ |
| 237 | ("J" . "\u200D") ; ZWJ |
| 238 | ("X" . "[\u0A80-\u0AFF]")))) ; all coverage |
| 239 | (indian-compose-regexp |
| 240 | (concat |
| 241 | ;; syllables with an independent vowel, or |
| 242 | "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?A?\\|" |
| 243 | ;; consonant-based syllables, or |
| 244 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?|v*n?a?A?\\)\\|" |
| 245 | ;; special consonant form, or |
| 246 | "JHR\\|" |
| 247 | ;; any other singleton characters |
| 248 | "X") |
| 249 | table)) |
| 250 | "Regexp matching a composable sequence of Gujarati characters.") |
| 251 | |
| 252 | (defconst oriya-composable-pattern |
| 253 | (let ((table |
| 254 | '(("a" . "\u0B01") ; SIGN CANDRABINDU |
| 255 | ("A" . "[\u0B02-\u0B03]") ; SIGN ANUSVARA .. VISARGA |
| 256 | ("V" . "[\u0B05-\u0B14\u0B60-\u0B61]") ; independent vowel |
| 257 | ("C" . "[\u0B15-\u0B39\u0B5C-\u0B5D\u0B71]") ; consonant |
| 258 | ("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23-\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38-\u0B39]") ; consonant with below form |
| 259 | ("n" . "\u0B3C") ; NUKTA |
| 260 | ("v" . "[\u0B3E-\u0B44\u0B56-\u0B57\u0B62-\u0B63]") ; vowel sign |
| 261 | ("H" . "\u0B4D") ; VIRAMA |
| 262 | ("N" . "\u200C") ; ZWNJ |
| 263 | ("J" . "\u200D") ; ZWJ |
| 264 | ("X" . "[\u0B00-\u0B7F]")))) ; all coverage |
| 265 | (indian-compose-regexp |
| 266 | (concat |
| 267 | ;; syllables with an independent vowel, or |
| 268 | "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|" |
| 269 | ;; consonant-based syllables, or |
| 270 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?|v*n?a?A?\\)\\|" |
| 271 | ;; special consonant form, or |
| 272 | "JHB\\|" |
| 273 | ;; any other singleton characters |
| 274 | "X") |
| 275 | table)) |
| 276 | "Regexp matching a composable sequence of Oriya characters.") |
| 277 | |
| 278 | (defconst tamil-composable-pattern |
| 279 | (let ((table |
| 280 | '(("a" . "\u0B82") ; SIGN ANUSVARA |
| 281 | ("V" . "[\u0B85-\u0B94]") ; independent vowel |
| 282 | ("C" . "[\u0B95-\u0BB9]") ; consonant |
| 283 | ("v" . "[\u0BBE-\u0BC8\u0BD7]") ; vowel sign |
| 284 | ("H" . "\u0BCD") ; VIRAMA |
| 285 | ("N" . "\u200C") ; ZWNJ |
| 286 | ("J" . "\u200D") ; ZWJ |
| 287 | ("X" . "[\u0B80-\u0BFF]")))) ; all coverage |
| 288 | (indian-compose-regexp |
| 289 | (concat |
| 290 | ;; consonant-based syllables, or |
| 291 | "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?|v*a?\\)\\|" |
| 292 | ;; syllables with an independent vowel, or |
| 293 | "Vv*a?\\|" |
| 294 | ;; any other singleton characters |
| 295 | "X") |
| 296 | table)) |
| 297 | "Regexp matching a composable sequence of Tamil characters.") |
| 298 | |
| 299 | (defconst telugu-composable-pattern |
| 300 | (let ((table |
| 301 | '(("a" . "[\u0C01-\u0C03]") ; SIGN CANDRABINDU .. VISARGA |
| 302 | ("V" . "[\u0C05-\u0C14\u0C60-\u0C61]") ; independent vowel |
| 303 | ("C" . "[\u0C15-\u0C39\u0C58-\u0C59]") ; consonant |
| 304 | ("v" . "[\u0C3E-\u0C4C\u0C55-\u0C56\u0C62-\u0C63]") ; vowel sign |
| 305 | ("H" . "\u0BCD") ; VIRAMA |
| 306 | ("N" . "\u200C") ; ZWNJ |
| 307 | ("J" . "\u200D") ; ZWJ |
| 308 | ("X" . "[\u0C00-\u0C7F]")))) ; all coverage |
| 309 | (indian-compose-regexp |
| 310 | (concat |
| 311 | ;; consonant-based syllables, or |
| 312 | "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?|v*a?\\)\\|" |
| 313 | ;; syllables with an independent vowel, or |
| 314 | "V\\(?:J?HC\\)?v*a?\\|" |
| 315 | ;; special consonant form, or |
| 316 | "JHC\\|" |
| 317 | ;; any other singleton characters |
| 318 | "X") |
| 319 | table)) |
| 320 | "Regexp matching a composable sequence of Telugu characters.") |
| 321 | |
| 322 | (defconst kannada-composable-pattern |
| 323 | (let ((table |
| 324 | '(("A" . "[\u0C82-\u0C83]") ; SIGN ANUSVARA .. VISARGA |
| 325 | ("V" . "[\u0C85-\u0C94\u0CE0-\u0CE1]") ; independent vowel |
| 326 | ("C" . "[\u0C95-\u0CB9\u0CDE]") ; consonant |
| 327 | ("B" . "\u0CB0") ; RA |
| 328 | ("n" . "\u0CBC") ; NUKTA |
| 329 | ("v" . "[\u0CBE-\u0CCC\u0CD5-\u0CD6\u0CE2-\u0CE3]") ; vowel sign |
| 330 | ("H" . "\u0CCD") ; VIRAMA |
| 331 | ("N" . "\u200C") ; ZWNJ |
| 332 | ("J" . "\u200D") ; ZWJ |
| 333 | ("X" . "[\u0C80-\u0CFF]")))) ; all coverage |
| 334 | (indian-compose-regexp |
| 335 | (concat |
| 336 | ;; syllables with an independent vowel, or |
| 337 | "\\(?:RH\\)?Vn?\\(?:J?HC\\)?v?A?\\|" |
| 338 | ;; consonant-based syllables, or |
| 339 | "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?|v*n?A?\\)\\|" |
| 340 | ;; special consonant form, or |
| 341 | "JHB\\|" |
| 342 | ;; any other singleton characters |
| 343 | "X") |
| 344 | table)) |
| 345 | "Regexp matching a composable sequence of Kannada characters.") |
| 346 | |
| 347 | (defconst malayalam-composable-pattern |
| 348 | (let ((table |
| 349 | '(("A" . "[\u0D02-\u0D03]") ; SIGN ANUSVARA .. VISARGA |
| 350 | ("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel |
| 351 | ("C" . "[\u0D15-\u0D39]") ; consonant |
| 352 | ("Y" . "[\u0D2F-\u0D30\u0D32\u0D35]") ; YA, RA, LA, VA |
| 353 | ("v" . "[\u0D3E-\u0D48\u0D57\u0D62-\u0D63]") ; postbase matra |
| 354 | ("N" . "\u200C") ; ZWNJ |
| 355 | ("J" . "\u200D") ; ZWJ |
| 356 | ("X" . "[\u0D00-\u0D7F]")))) ; all coverage |
| 357 | (indian-compose-regexp |
| 358 | (concat |
| 359 | ;; consonant-based syllables, or |
| 360 | "\\(?:CJ?HJ?C\\)*\\(?:H[NJ]?\\|v?A?\\)\\|" |
| 361 | ;; syllables with an independent vowel, or |
| 362 | "V\\(?:J?HY\\)?v*?A?\\|" |
| 363 | ;; special consonant form, or |
| 364 | "JHY\\|" |
| 365 | ;; any other singleton characters |
| 366 | "X") |
| 367 | table)) |
| 368 | "Regexp matching a composable sequence of Malayalam characters.") |
| 369 | |
| 370 | (let ((script-regexp-alist |
| 371 | `((devanagari . ,devanagari-composable-pattern) |
| 372 | (bengali . ,bengali-composable-pattern) |
| 373 | (gurmukhi . ,gurmukhi-composable-pattern) |
| 374 | (gujarati . ,gujarati-composable-pattern) |
| 375 | (oriya . ,oriya-composable-pattern) |
| 376 | (tamil . ,tamil-composable-pattern) |
| 377 | (telugu . ,telugu-composable-pattern) |
| 378 | (kannada . ,kannada-composable-pattern) |
| 379 | (malayalam . ,malayalam-composable-pattern)))) |
| 380 | (map-char-table |
| 381 | #'(lambda (key val) |
| 382 | (let ((slot (assq val script-regexp-alist))) |
| 383 | (if slot |
| 384 | (set-char-table-range |
| 385 | composition-function-table key |
| 386 | (list (vector (cdr slot) 0 'font-shape-gstring)))))) |
| 387 | char-script-table)) |
| 388 | |
| 389 | (provide 'indian) |
| 390 | |
| 391 | ;; arch-tag: 83aa8fc7-7ee2-4364-a6e5-498f5e3b8c2f |
| 392 | ;;; indian.el ends here |