| 1 | ;;; indian.el --- Indian languages support -*- coding: utf-8; -*- |
| 2 | |
| 3 | ;; Copyright (C) 1997, 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 |
| 4 | ;; Free Software Foundation, Inc. |
| 5 | ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 |
| 6 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
| 7 | ;; Registration Number H14PRO021 |
| 8 | |
| 9 | ;; Maintainer: Kenichi Handa <handa@m17n.org> |
| 10 | ;; KAWABATA, Taichi <kawabata@m17n.org> |
| 11 | ;; Keywords: multilingual, i18n, Indian |
| 12 | |
| 13 | ;; This file is part of GNU Emacs. |
| 14 | |
| 15 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 16 | ;; it under the terms of the GNU General Public License as published by |
| 17 | ;; the Free Software Foundation, either version 3 of the License, or |
| 18 | ;; (at your option) any later version. |
| 19 | |
| 20 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 21 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 22 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 23 | ;; GNU General Public License for more details. |
| 24 | |
| 25 | ;; You should have received a copy of the GNU General Public License |
| 26 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 27 | |
| 28 | ;;; Commentary: |
| 29 | |
| 30 | ;; This file contains definitions of Indian language environments, and |
| 31 | ;; setups for displaying the scrtipts used there. |
| 32 | |
| 33 | ;;; Code: |
| 34 | |
| 35 | (define-coding-system 'in-is13194-devanagari |
| 36 | "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)." |
| 37 | :coding-type 'iso-2022 |
| 38 | :mnemonic ?D |
| 39 | :designation [ascii indian-is13194 nil nil] |
| 40 | :charset-list '(ascii indian-is13194) |
| 41 | :post-read-conversion 'in-is13194-post-read-conversion |
| 42 | :pre-write-conversion 'in-is13194-pre-write-conversion) |
| 43 | |
| 44 | (define-coding-system-alias 'devanagari 'in-is13194-devanagari) |
| 45 | |
| 46 | (set-language-info-alist |
| 47 | "Devanagari" '((charset unicode) |
| 48 | (coding-system utf-8) |
| 49 | (coding-priority utf-8) |
| 50 | (input-method . "dev-aiba") |
| 51 | (documentation . "\ |
| 52 | Such languages using Devanagari script as Hindi and Marathi |
| 53 | are supported in this language environment.")) |
| 54 | '("Indian")) |
| 55 | |
| 56 | (set-language-info-alist |
| 57 | "Bengali" '((charset unicode) |
| 58 | (coding-system utf-8) |
| 59 | (coding-priority utf-8) |
| 60 | (input-method . "bengali-itrans") |
| 61 | (documentation . "\ |
| 62 | Such languages using Bengali script as Bengali and Assamese |
| 63 | are supported in this language environment.")) |
| 64 | '("Indian")) |
| 65 | |
| 66 | (set-language-info-alist |
| 67 | "Punjabi" '((charset unicode) |
| 68 | (coding-system utf-8) |
| 69 | (coding-priority utf-8) |
| 70 | (input-method . "punjabi-itrans") |
| 71 | (documentation . "\ |
| 72 | North Indian language Punjabi is supported in this language environment.")) |
| 73 | '("Indian")) |
| 74 | |
| 75 | (set-language-info-alist |
| 76 | "Gujarati" '((charset unicode) |
| 77 | (coding-system utf-8) |
| 78 | (coding-priority utf-8) |
| 79 | (input-method . "gujarati-itrans") |
| 80 | (documentation . "\ |
| 81 | North Indian language Gujarati is supported in this language environment.")) |
| 82 | '("Indian")) |
| 83 | |
| 84 | (set-language-info-alist |
| 85 | "Oriya" '((charset unicode) |
| 86 | (coding-system utf-8) |
| 87 | (coding-priority utf-8) |
| 88 | (input-method . "oriya-itrans") |
| 89 | (documentation . "\ |
| 90 | Such languages using Oriya script as Oriya, Khonti, and Santali |
| 91 | are supported in this language environment.")) |
| 92 | '("Indian")) |
| 93 | |
| 94 | (set-language-info-alist |
| 95 | "Tamil" '((charset unicode) |
| 96 | (coding-system utf-8) |
| 97 | (coding-priority utf-8) |
| 98 | (input-method . "tamil-itrans") |
| 99 | (documentation . "\ |
| 100 | South Indian Language Tamil is supported in this language environment.")) |
| 101 | '("Indian")) |
| 102 | |
| 103 | (set-language-info-alist |
| 104 | "Telugu" '((charset unicode) |
| 105 | (coding-system utf-8) |
| 106 | (coding-priority utf-8) |
| 107 | (input-method . "telugu-itrans") |
| 108 | (documentation . "\ |
| 109 | South Indian Language Telugu is supported in this language environment.")) |
| 110 | '("Indian")) |
| 111 | |
| 112 | (set-language-info-alist |
| 113 | "Kannada" '((charset unicode) |
| 114 | (coding-system mule-utf-8) |
| 115 | (coding-priority mule-utf-8) |
| 116 | (input-method . "kannada-itrans") |
| 117 | (sample-text . "Kannada (ಕನ್ನಡ) ನಮಸ್ಕಾರ") |
| 118 | (documentation . "\ |
| 119 | Kannada language and script is supported in this language |
| 120 | environment.")) |
| 121 | '("Indian")) |
| 122 | |
| 123 | (set-language-info-alist |
| 124 | "Malayalam" '((charset unicode) |
| 125 | (coding-system utf-8) |
| 126 | (coding-priority utf-8) |
| 127 | (input-method . "malayalam-itrans") |
| 128 | (documentation . "\ |
| 129 | South Indian language Malayalam is supported in this language environment.")) |
| 130 | '("Indian")) |
| 131 | |
| 132 | (defconst devanagari-composable-pattern |
| 133 | (concat |
| 134 | "\\([अ-औॠॡ][ँं]?\\)\\|[ः।]" |
| 135 | "\\|\\(" |
| 136 | "\\(?:\\(?:[क-हक़-य़]्\\)?\\(?:[क-हक़-य़]्\\)?\\(?:[क-हक़-य़]्\\)?[क-हक़-य़]्\\)?" |
| 137 | "[क-हक़-य़]\\(?:्\\|[ा-्ॢॣ]?[ंँ]?\\)?" |
| 138 | "\\)") |
| 139 | "Regexp matching a composable sequence of Devanagari characters.") |
| 140 | |
| 141 | (defconst tamil-composable-pattern |
| 142 | (concat |
| 143 | "\\([அ-ஔ]\\)\\|" |
| 144 | "[ஂஃ]\\|" ;; vowel modifier considered independent |
| 145 | "\\(\\(?:\\(?:க்ஷ\\)\\|[க-ஹ]\\)[்ா-ௌ]?\\)\\|" |
| 146 | "\\(ஷ்ரீ\\)") |
| 147 | "Regexp matching a composable sequence of Tamil characters.") |
| 148 | |
| 149 | (defconst kannada-composable-pattern |
| 150 | (concat |
| 151 | "\\([ಂ-ಔೠಌ]\\)\\|[ಃ]" |
| 152 | "\\|\\(" |
| 153 | "\\(?:\\(?:[ಕ-ಹ]್\\)?\\(?:[ಕ-ಹ]್\\)?\\(?:[ಕ-ಹ]್\\)?[ಕ-ಹ]್\\)?" |
| 154 | "[ಕ-ಹ]\\(?:್\\|[ಾ-್ೕೃ]?\\)?" |
| 155 | "\\)") |
| 156 | "Regexp matching a composable sequence of Kannada characters.") |
| 157 | |
| 158 | (defconst malayalam-composable-pattern |
| 159 | (concat |
| 160 | "\\([അ-ഔ][ം]?\\)\\|ഃ" |
| 161 | "\\|\\(" |
| 162 | "\\(?:\\(?:[ക-ഹ]്\\)?\\(?:[ക-ഹ]്\\)?\\(?:[ക-ഹ]്\\)?[ക-ഹ]്\\)?" |
| 163 | "[ക-ഹ]\\(?:്\\|[ാ-ൃെേൈൊൊോൌ]?[ം്]?\\)?" |
| 164 | "\\)") |
| 165 | "Regexp matching a composable sequence of Malayalam characters.") |
| 166 | |
| 167 | (let ((script-regexp-alist |
| 168 | `((devanagari . ,devanagari-composable-pattern) |
| 169 | (bengali . "[\x980-\x9FF\x200C\x200D]+") |
| 170 | (gurmukhi . "[\xA00-\xA7F\x200C\x200D]+") |
| 171 | (gujarati . "[\xA80-\xAFF\x200C\x200D]+") |
| 172 | (oriya . "[\xB00-\xB7F\x200C\x200D]+") |
| 173 | (tamil . ,tamil-composable-pattern) |
| 174 | (telugu . "[\xC00-\xC7F\x200C\x200D]+") |
| 175 | (kannada . ,kannada-composable-pattern) |
| 176 | (malayalam . ,malayalam-composable-pattern)))) |
| 177 | (map-char-table |
| 178 | #'(lambda (key val) |
| 179 | (let ((slot (assq val script-regexp-alist))) |
| 180 | (if slot |
| 181 | (set-char-table-range |
| 182 | composition-function-table key |
| 183 | (list (vector (cdr slot) 0 'font-shape-gstring)))))) |
| 184 | char-script-table)) |
| 185 | |
| 186 | (provide 'indian) |
| 187 | |
| 188 | ;; arch-tag: 83aa8fc7-7ee2-4364-a6e5-498f5e3b8c2f |
| 189 | ;;; indian.el ends here |