Commit | Line | Data |
---|---|---|
1bec6fdb | 1 | ;;; indian.el --- Indian languages support -*- coding: iso-2022-7bit; -*- |
4ed46869 | 2 | |
38141d20 | 3 | ;; Copyright (C) 1997, 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 |
d4877ac1 | 4 | ;; Free Software Foundation, Inc. |
38141d20 | 5 | ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 |
eaa61218 KH |
6 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
7 | ;; Registration Number H14PRO021 | |
4ed46869 | 8 | |
2fc227cb | 9 | ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org> |
51896ebc | 10 | ;; Keywords: multilingual, i18n, Indian |
4ed46869 KH |
11 | |
12 | ;; This file is part of GNU Emacs. | |
13 | ||
14 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
15 | ;; it under the terms of the GNU General Public License as published by | |
d7142f3e | 16 | ;; the Free Software Foundation; either version 3, or (at your option) |
4ed46869 KH |
17 | ;; any later version. |
18 | ||
19 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
20 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
22 | ;; GNU General Public License for more details. | |
23 | ||
24 | ;; You should have received a copy of the GNU General Public License | |
e803d6bd | 25 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
3a35cf56 LK |
26 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
27 | ;; Boston, MA 02110-1301, USA. | |
4ed46869 KH |
28 | |
29 | ;;; Commentary: | |
30 | ||
1bec6fdb RS |
31 | ;; This file defines in-is13194 coding system and relationship between |
32 | ;; indian-glyph character-set and various CDAC fonts. | |
4ed46869 KH |
33 | |
34 | ;;; Code: | |
35 | ||
e1915ab3 KH |
36 | (define-coding-system 'in-is13194-devanagari |
37 | "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)." | |
38 | :coding-type 'iso-2022 | |
39 | :mnemonic ?D | |
40 | :designation [ascii indian-is13194 nil nil] | |
41 | :charset-list '(ascii indian-is13194) | |
5e145d59 KH |
42 | :post-read-conversion 'in-is13194-post-read-conversion |
43 | :pre-write-conversion 'in-is13194-pre-write-conversion) | |
e1915ab3 KH |
44 | |
45 | (define-coding-system-alias 'devanagari 'in-is13194-devanagari) | |
8805e649 | 46 | |
098d86d5 KH |
47 | (defvar indian-font-foundry 'cdac |
48 | "Font foundry for Indian characters. | |
49 | Currently supported foundries are `cdac' and `akruti'.") | |
50 | ||
51 | (defvar indian-script-language-alist | |
f9c237ea KS |
52 | '((devanagari (hindi sanskrit) nil) |
53 | (bengali (bengali assamese) nil) | |
54 | (gurmukhi (punjabi) nil) | |
55 | (gujarati (gujarati) nil) | |
56 | (oriya (oriya) nil) | |
57 | (tamil (tamil) nil) | |
58 | (telugu (telugu) nil) | |
59 | (kannada (kannada) nil) | |
60 | (malayalam (malayalam) nil)) | |
098d86d5 KH |
61 | "Alist of Indian scripts vs the corresponding language list and font foundry. |
62 | Each element has this form: | |
63 | ||
64 | (SCRIPT LANGUAGE-LIST FONT-FOUNDRY) | |
65 | ||
66 | SCRIPT is one of Indian script names. | |
67 | ||
68 | LANGUAGE-LIST is a list of Indian langauge names SCRIPT is used for. | |
69 | The list is in the priority order. | |
70 | ||
71 | FONT-FOUNDRY is a font foundry representing a group of Indian | |
72 | fonts. If the value is nil, the value of `indian-font-foundry' | |
f9c237ea | 73 | is used.") |
098d86d5 KH |
74 | |
75 | (defconst indian-font-char-index-table | |
76 | '( ; for which language(s) | |
77 | ;; CDAC fonts | |
78 | (#x0000 . cdac:dv-ttsurekh) ; hindi, etc | |
79 | (#x0100 . cdac:sd-ttsurekh) ; sanskrit | |
80 | (#x0200 . cdac:bn-ttdurga) ; bengali | |
f2029233 KH |
81 | (#x0300 . cdac:tm-ttvalluvar) ; tamil |
82 | (#x0400 . cdac:tl-tthemalatha) ; telugu | |
83 | (#x0500 . cdac:as-ttdurga) ; assamese | |
098d86d5 | 84 | (#x0600 . cdac:or-ttsarala) ; oriya |
f2029233 KH |
85 | (#x0700 . cdac:kn-ttuma) ; kannada |
86 | (#x0800 . cdac:ml-ttkarthika) ; malayalam | |
87 | (#x0900 . cdac:gj-ttavantika) ; gujarati | |
88 | (#x0A00 . cdac:pn-ttamar) ; punjabi | |
098d86d5 KH |
89 | |
90 | ;; AKRUTI fonts | |
91 | (#x0B00 . akruti:dev) ; hindi, etc | |
92 | (#x0C00 . akruti:bng) ; bengali | |
93 | (#x0D00 . akruti:pnj) ; punjabi | |
94 | (#x0E00 . akruti:guj) ; gujarati | |
95 | (#x0F00 . akruti:ori) ; oriya | |
96 | (#x1000 . akruti:tml) ; tamil | |
97 | (#x1100 . akruti:tlg) ; telugu | |
98 | (#x1200 . akruti:knd) ; kannada | |
99 | (#x1300 . akruti:mal) ; malayalam | |
100 | ) | |
f9c237ea | 101 | "Alist of indices of `indian-glyph' character vs Indian font identifiers. |
098d86d5 KH |
102 | Each element has this form: (INDEX . FONT-IDENTIFIER) |
103 | ||
104 | INDEX is an index number of the first character in the charset | |
105 | `indian-glyph' assigned for glyphs in the font specified by | |
106 | FONT-IDENTIFIER. Currently FONT-IDENTIFIERs are defined for CDAC | |
107 | and AKRUTI font groups.") | |
f9c237ea | 108 | |
098d86d5 KH |
109 | (defun indian-font-char (index font-identifier) |
110 | "Return character of charset `indian-glyph' made from glyph index INDEX. | |
111 | FONT-IDENTIFIER is an identifier of an Indian font listed in the | |
112 | variable `indian-font-char-index-table'. It specifies which | |
113 | font INDEX is for." | |
114 | (if (or (< index 0) (> index 255)) | |
115 | (error "Invalid glyph index: %d" index)) | |
116 | (let ((start (car (rassq font-identifier indian-font-char-index-table)))) | |
117 | (if (not start) | |
118 | (error "Unknown font identifier: %s" font-identifier)) | |
119 | (setq index (+ start index)) | |
120 | (make-char 'indian-glyph (+ (/ index 96) 32) (+ (% index 96) 32)))) | |
121 | ||
122 | ;; Return a range of characters (cons of min and max character) of the | |
123 | ;; charset `indian-glyph' for displaying SCRIPT in LANGUAGE by a font | |
124 | ;; of FOUNDRY. | |
125 | ||
126 | (defun indian-font-char-range (font-identifier) | |
127 | (cons (indian-font-char 0 font-identifier) | |
128 | (indian-font-char 255 font-identifier))) | |
f9c237ea | 129 | |
1bec6fdb RS |
130 | (defvar indian-script-table |
131 | '[ | |
132 | devanagari | |
133 | sanskrit | |
134 | bengali | |
135 | tamil | |
136 | telugu | |
137 | assamese | |
138 | oriya | |
139 | kannada | |
140 | malayalam | |
141 | gujarati | |
142 | punjabi | |
143 | ] | |
144 | "Vector of Indian script names.") | |
145 | ||
146 | (let ((len (length indian-script-table)) | |
147 | (i 0)) | |
148 | (while (< i len) | |
149 | (put (aref indian-script-table i) 'indian-glyph-code-offset (* 256 i)) | |
150 | (setq i (1+ i)))) | |
151 | ||
152 | (defvar indian-default-script 'devanagari | |
153 | "Default script for Indian languages. | |
154 | Each Indian language environment sets this value | |
155 | to one of `indian-script-table' (which see). | |
156 | The default value is `devanagari'.") | |
157 | ||
3719c14f KH |
158 | (defvar indian-composable-pattern |
159 | (make-char-table nil) | |
160 | "Char table of regexps for composable Indian character sequence.") | |
098d86d5 | 161 | |
9b87bff0 KH |
162 | (let ((script-regexp-alist |
163 | '((devanagari . "[\x900-\x9FF\x200C\x200D]+") | |
164 | (bengali . "[\x980-\x9FF\x200C\x200D]+") | |
165 | (gurmukhi . "[\xA00-\xA7F\x200C\x200D]+") | |
166 | (gujarati . "[\xA80-\xAFF\x200C\x200D]+") | |
167 | (oriya . "[\xB00-\xB7F\x200C\x200D]+") | |
168 | (tamil . "[\xB80-\xBFF\x200C\x200D]+") | |
169 | (telugu . "[\xC00-\xC7F\x200C\x200D]+") | |
170 | (kannada . "[\xC80-\xCFF\x200C\x200D]+") | |
171 | (malayalam . "[\xD00-\xD7F\x200C\x200D]+") | |
172 | (sinhala . "[\xD80-\xDFF\x200C\x200D]+")))) | |
173 | (map-char-table #'(lambda (key val) | |
174 | (let ((slot (assq val script-regexp-alist))) | |
175 | (if slot | |
176 | (set-char-table-range | |
177 | composition-function-table key | |
178 | (list (cons (cdr slot) 'font-shape-text)))))) | |
179 | char-script-table)) | |
180 | ||
181 | ||
41da80b1 | 182 | (provide 'indian) |
1bec6fdb | 183 | |
6b61353c | 184 | ;;; arch-tag: 83aa8fc7-7ee2-4364-a6e5-498f5e3b8c2f |
4ed46869 | 185 | ;;; indian.el ends here |