Commit | Line | Data |
---|---|---|
4b725a70 | 1 | ;;; ind-util.el --- Transliteration and Misc. Tools for Indian Languages -*- coding: utf-8-emacs; -*- |
cd4e9344 | 2 | |
ab422c4d | 3 | ;; Copyright (C) 2001-2013 Free Software Foundation, Inc. |
cd4e9344 | 4 | |
c7357293 | 5 | ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org> |
cd4e9344 RS |
6 | ;; Keywords: multilingual, Indian, Devanagari |
7 | ||
8 | ;; This file is part of GNU Emacs. | |
9 | ||
4936186e | 10 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
cd4e9344 | 11 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
12 | ;; the Free Software Foundation, either version 3 of the License, or |
13 | ;; (at your option) any later version. | |
cd4e9344 RS |
14 | |
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
cd4e9344 RS |
22 | |
23 | ;;; Commentary: | |
24 | ||
25 | ;; This file provides conversion between UCS and various | |
26 | ;; transliteration schemes, such as ITRANS, kyoto-harvard and aiba | |
27 | ;; methods. It also provides conversion between IS 13194 and UCS. | |
28 | ;; Finally, this program provides the compatibility support with | |
29 | ;; old implementation of Devanagari script. | |
30 | ||
31 | ;;; Code: | |
32 | ||
33 | ;;; Transliteration | |
34 | ||
35 | ;; The followings provide the various transliteration schemes (such as | |
36 | ;; ITRANS, kyoto-harvard, and Aiba) of Indian scripts. They are also | |
37 | ;; used in quail/indian.el for typing Indian script in Emacs. | |
38 | ||
39 | (eval-and-compile | |
cd4e9344 | 40 | |
48cb5913 | 41 | (defun indian-regexp-of-hashtbl-keys (hashtbl) |
cd4e9344 | 42 | "Returns the regular expression of hashtable keys." |
462a7ed6 SM |
43 | (let (keys) |
44 | (maphash (lambda (key val) (push key keys)) hashtbl) | |
45 | (regexp-opt keys))) | |
cd4e9344 RS |
46 | |
47 | (defvar indian-dev-base-table | |
48 | '( | |
49 | (;; VOWELS (18) | |
4b725a70 PE |
50 | (?अ nil) (?आ ?ा) (?इ ?ि) (?ई ?ी) (?उ ?ु) (?ऊ ?ू) |
51 | (?ऋ ?ृ) (?ऌ ?ॢ) (?ऍ ?ॅ) (?ऎ ?ॆ) (?ए ?े) (?ऐ ?ै) | |
52 | (?ऑ ?ॉ) (?ऒ ?ॊ) (?ओ ?ो) (?औ ?ौ) (?ॠ ?ॄ) (?ॡ ?ॣ)) | |
cd4e9344 | 53 | (;; CONSONANTS (currently 42, including special cases) |
4b725a70 PE |
54 | ?क ?ख ?ग ?घ ?ङ ;; GUTTRULS |
55 | ?च ?छ ?ज ?झ ?ञ ;; PALATALS | |
56 | ?ट ?ठ ?ड ?ढ ?ण ;; CEREBRALS | |
57 | ?त ?थ ?द ?ध ?न ?ऩ ;; DENTALS | |
58 | ?प ?फ ?ब ?भ ?म ;; LABIALS | |
59 | ?य ?र ?ऱ ?ल ?ळ ?ऴ ?व ;; SEMIVOWELS | |
60 | ?श ?ष ?स ?ह ;; SIBILANTS | |
61 | ?क़ ?ख़ ?ग़ ?ज़ ?ड़ ?ढ़ ?फ़ ?य़ ;; NUKTAS | |
62 | "ज्ञ" "क्ष") | |
a1506d29 | 63 | (;; Misc Symbols (7) |
4b725a70 | 64 | ?ँ ?ं ?ः ?ऽ ?् ?ॐ ?।) |
cd4e9344 | 65 | (;; Digits (10) |
4b725a70 | 66 | ?० ?१ ?२ ?३ ?४ ?५ ?६ ?७ ?८ ?९) |
cd4e9344 | 67 | (;; Inscript-extra (4) (#, $, ^, *, ]) |
4b725a70 | 68 | "्र" "र्" "त्र" "श्र" "़"))) |
cd4e9344 | 69 | |
c7357293 KH |
70 | ;; Punjabi is also known as Gurmukhi. |
71 | (defvar indian-pnj-base-table | |
72 | '( | |
73 | (;; VOWELS | |
4b725a70 PE |
74 | (?ਅ nil) (?ਆ ?ਾ) (?ਇ ?ਿ) (?ਈ ?ੀ) (?ਉ ?ੁ) (?ਊ ?ੂ) |
75 | nil nil nil nil (?ਏ ?ੇ) (?ਐ ?ੈ) | |
76 | nil nil (?ਓ ?ੋ) (?ਔ ?ੌ) nil nil) | |
c7357293 | 77 | (;; CONSONANTS |
4b725a70 PE |
78 | ?ਕ ?ਖ ?ਗ ?ਘ ?ਙ ;; GUTTRULS |
79 | ?ਚ ?ਛ ?ਜ ?ਝ ?ਞ ;; PALATALS | |
80 | ?ਟ ?ਠ ?ਡ ?ਢ ?ਣ ;; CEREBRALS | |
81 | ?ਤ ?ਥ ?ਦ ?ਧ ?ਨ nil ;; DENTALS | |
82 | ?ਪ ?ਫ ?ਬ ?ਭ ?ਮ ;; LABIALS | |
83 | ?ਯ ?ਰ nil ?ਲ ?ਲ਼ nil ?ਵ ;; SEMIVOWELS | |
84 | ?ਸ਼ nil ?ਸ ?ਹ ;; SIBILANTS | |
85 | nil ?ਖ਼ ?ਗ਼ ?ਜ਼ ?ੜ nil ?ਫ਼ nil ;; NUKTAS | |
86 | "ਜ੍ਞ" nil) | |
c7357293 | 87 | (;; Misc Symbols (7) |
4b725a70 | 88 | nil ?ਂ nil nil ?੍ nil nil) ;; ek onkar, etc. |
c7357293 | 89 | (;; Digits |
4b725a70 | 90 | ?੦ ?੧ ?੨ ?੩ ?੪ ?੫ ?੬ ?੭ ?੮ ?੯) |
c7357293 | 91 | (;; Inscript-extra (4) (#, $, ^, *, ]) |
4b725a70 | 92 | "੍ਰ" "ਰ੍" "ਤ੍ਰ" "ਸ਼੍ਰ" "਼"))) |
c7357293 KH |
93 | |
94 | (defvar indian-gjr-base-table | |
95 | '( | |
96 | (;; VOWELS | |
4b725a70 PE |
97 | (?અ nil) (?આ ?ા) (?ઇ ?િ) (?ઈ ?ી) (?ઉ ?ુ) (?ઊ ?ૂ) |
98 | (?ઋ ?ૃ) nil (?ઍ ?ૅ) nil (?એ ?ે) (?ઐ ?ૈ) | |
99 | (?ઑ ?ૉ) nil (?ઓ ?ો) (?ઔ ?ૌ) (?ૠ ?ૄ) nil) | |
c7357293 | 100 | (;; CONSONANTS |
4b725a70 PE |
101 | ?ક ?ખ ?ગ ?ઘ ?ઙ ;; GUTTRULS |
102 | ?ચ ?છ ?જ ?ઝ ?ઞ ;; PALATALS | |
103 | ?ટ ?ઠ ?ડ ?ઢ ?ણ ;; CEREBRALS | |
104 | ?ત ?થ ?દ ?ધ ?ન nil ;; DENTALS | |
105 | ?પ ?ફ ?બ ?ભ ?મ ;; LABIALS | |
106 | ?ય ?ર nil ?લ ?ળ nil ?વ ;; SEMIVOWELS | |
107 | ?શ ?ષ ?સ ?હ ;; SIBILANTS | |
c7357293 | 108 | nil nil nil nil nil nil nil nil ;; NUKTAS |
4b725a70 | 109 | "જ્ઞ" "ક્ષ") |
c7357293 | 110 | (;; Misc Symbols (7) |
4b725a70 | 111 | ?ઁ ?ં ?ઃ ?ઽ ?્ ?ૐ nil) |
c7357293 | 112 | (;; Digits |
4b725a70 | 113 | ?૦ ?૧ ?૨ ?૩ ?૪ ?૫ ?૬ ?૭ ?૮ ?૯) |
c7357293 | 114 | (;; Inscript-extra (4) (#, $, ^, *, ]) |
4b725a70 | 115 | "્ર" "ર્" "ત્ર" "શ્ર" "઼"))) |
c7357293 KH |
116 | |
117 | (defvar indian-ori-base-table | |
118 | '( | |
119 | (;; VOWELS | |
4b725a70 PE |
120 | (?ଅ nil) (?ଆ ?ା) (?ଇ ?ି) (?ଈ ?ୀ) (?ଉ ?ୁ) (?ଊ ?ୂ) |
121 | (?ଋ ?ୃ) (?ଌ nil) nil nil (?ଏ ?େ) (?ଐ ?ୈ) | |
122 | nil nil (?ଓ ?ୋ) (?ଔ ?ୌ) (?ୠ nil) (?ୡ nil)) | |
c7357293 | 123 | (;; CONSONANTS |
4b725a70 PE |
124 | ?କ ?ଖ ?ଗ ?ଘ ?ଙ ;; GUTTRULS |
125 | ?ଚ ?ଛ ?ଜ ?ଝ ?ଞ ;; PALATALS | |
126 | ?ଟ ?ଠ ?ଡ ?ଢ ?ଣ ;; CEREBRALS | |
127 | ?ତ ?ଥ ?ଦ ?ଧ ?ନ nil ;; DENTALS | |
128 | ?ପ ?ଫ ?ବ ?ଭ ?ମ ;; LABIALS | |
129 | ?ଯ ?ର nil ?ଲ ?ଳ nil nil ;; SEMIVOWELS | |
130 | ?ଶ ?ଷ ?ସ ?ହ ;; SIBILANTS | |
131 | nil nil nil nil ?ଡ଼ ?ଢ଼ nil ?ୟ ;; NUKTAS | |
132 | "ଜ୍ଞ" "କ୍ଷ") | |
c7357293 | 133 | (;; Misc Symbols |
4b725a70 | 134 | ?ଁ ?ଂ ?ଃ ?ଽ ?୍ nil nil) |
c7357293 | 135 | (;; Digits |
4b725a70 | 136 | ?୦ ?୧ ?୨ ?୩ ?୪ ?୫ ?୬ ?୭ ?୮ ?୯) |
c7357293 | 137 | (;; Inscript-extra (4) (#, $, ^, *, ]) |
4b725a70 | 138 | "୍ର" "ର୍" "ତ୍ର" "ଶ୍ର" "଼"))) |
c7357293 KH |
139 | |
140 | (defvar indian-bng-base-table | |
141 | '( | |
142 | (;; VOWELS | |
4b725a70 PE |
143 | (?অ nil) (?আ ?া) (?ই ?ি) (?ঈ ?ী) (?উ ?ু) (?ঊ ?ূ) |
144 | (?ঋ ?ৃ) (?ঌ ?ৢ) nil nil (?এ ?ে) (?ঐ ?ৈ) | |
145 | nil nil (?ও ?ো) (?ঔ ?ৌ) (?ৠ ?ৄ) (?ৡ ?ৣ)) | |
c7357293 | 146 | (;; CONSONANTS |
4b725a70 PE |
147 | ?ক ?খ ?গ ?ঘ ?ঙ ;; GUTTRULS |
148 | ?চ ?ছ ?জ ?ঝ ?ঞ ;; PALATALS | |
149 | ?ট ?ঠ ?ড ?ঢ ?ণ ;; CEREBRALS | |
150 | ?ত ?থ ?দ ?ধ ?ন nil ;; DENTALS | |
151 | ?প ?ফ ?ব ?ভ ?ম ;; LABIALS | |
152 | ?য ?র nil ?ল nil nil nil ;; SEMIVOWELS | |
153 | ?শ ?ষ ?স ?হ ;; SIBILANTS | |
154 | nil nil nil nil ?ড় ?ঢ় nil ?য় ;; NUKTAS | |
155 | "জ্ঞ" "ক্ষ") | |
c7357293 | 156 | (;; Misc Symbols |
4b725a70 | 157 | ?ঁ ?ং ?ঃ nil ?্ nil nil) |
c7357293 | 158 | (;; Digits |
4b725a70 | 159 | ?০ ?১ ?২ ?৩ ?৪ ?৫ ?৬ ?৭ ?৮ ?৯) |
c7357293 | 160 | (;; Inscript-extra (4) (#, $, ^, *, ]) |
4b725a70 | 161 | "্র" "র্" "ত্র" "শ্র" "়"))) |
c7357293 KH |
162 | |
163 | (defvar indian-asm-base-table | |
164 | '( | |
165 | (;; VOWELS | |
4b725a70 PE |
166 | (?অ nil) (?আ ?া) (?ই ?ি) (?ঈ ?ী) (?উ ?ু) (?ঊ ?ূ) |
167 | (?ঋ ?ৃ) (?ঌ ?ৢ) nil nil (?এ ?ে) (?ঐ ?ৈ) | |
168 | nil nil (?ও ?ো) (?ঔ ?ৌ) (?ৠ ?ৄ) (?ৡ ?ৣ)) | |
c7357293 | 169 | (;; CONSONANTS |
4b725a70 PE |
170 | ?ক ?খ ?গ ?ঘ ?ঙ ;; GUTTRULS |
171 | ?চ ?ছ ?জ ?ঝ ?ঞ ;; PALATALS | |
172 | ?ট ?ঠ ?ড ?ঢ ?ণ ;; CEREBRALS | |
173 | ?ত ?থ ?দ ?ধ ?ন nil ;; DENTALS | |
174 | ?প ?ফ ?ব ?ভ ?ম ;; LABIALS | |
175 | ?য ?ৰ nil ?ল nil nil ?ৱ ;; SEMIVOWELS | |
176 | ?শ ?ষ ?স ?হ ;; SIBILANTS | |
177 | nil nil nil nil ?ড় ?ঢ় nil ?য় ;; NUKTAS | |
178 | "জ্ঞ" "ক্ষ") | |
c7357293 | 179 | (;; Misc Symbols |
4b725a70 | 180 | ?ঁ ?ং ?ঃ nil ?্ nil nil) |
c7357293 | 181 | (;; Digits |
4b725a70 | 182 | ?০ ?১ ?২ ?৩ ?৪ ?৫ ?৬ ?৭ ?৮ ?৯) |
c7357293 | 183 | (;; Inscript-extra (4) (#, $, ^, *, ]) |
4b725a70 | 184 | "্ৰ" "ৰ্" "ত্ৰ" "শ্ৰ" "়"))) |
c7357293 KH |
185 | |
186 | (defvar indian-tlg-base-table | |
187 | '( | |
188 | (;; VOWELS | |
4b725a70 PE |
189 | (?అ nil) (?ఆ ?ా) (?ఇ ?ి) (?ఈ ?ీ) (?ఉ ?ు) (?ఊ ?ూ) |
190 | (?ఋ ?ృ) (?ఌ nil) nil (?ఏ ?ే) (?ఎ ?ె) (?ఐ ?ై) | |
191 | nil (?ఓ ?ో) (?ఒ ?ొ) (?ఔ ?ౌ) (?ౠ ?ౄ) (?ౡ nil)) | |
c7357293 | 192 | (;; CONSONANTS |
4b725a70 PE |
193 | ?క ?ఖ ?గ ?ఘ ?ఙ ;; GUTTRULS |
194 | ?చ ?ఛ ?జ ?ఝ ?ఞ ;; PALATALS | |
195 | ?ట ?ఠ ?డ ?ఢ ?ణ ;; CEREBRALS | |
196 | ?త ?థ ?ద ?ధ ?న nil ;; DENTALS | |
197 | ?ప ?ఫ ?బ ?భ ?మ ;; LABIALS | |
198 | ?య ?ర ?ఱ ?ల ?ళ nil ?వ ;; SEMIVOWELS | |
199 | ?శ ?ష ?స ?హ ;; SIBILANTS | |
c7357293 | 200 | nil nil nil nil nil nil nil nil ;; NUKTAS |
4b725a70 | 201 | "జ్ఞ" "క్ష") |
c7357293 | 202 | (;; Misc Symbols |
4b725a70 | 203 | ?ఁ ?ం ?ః nil ?్ nil nil) |
c7357293 | 204 | (;; Digits |
4b725a70 | 205 | ?౦ ?౧ ?౨ ?౩ ?౪ ?౫ ?౬ ?౭ ?౮ ?౯) |
c7357293 | 206 | (;; Inscript-extra (4) (#, $, ^, *, ]) |
4b725a70 | 207 | "్ర" "ర్" "త్ర" "శ్ర" nil))) |
c7357293 KH |
208 | |
209 | (defvar indian-knd-base-table | |
210 | '( | |
211 | (;; VOWELS | |
4b725a70 PE |
212 | (?ಅ nil) (?ಆ ?ಾ) (?ಇ ?ಿ) (?ಈ ?ೀ) (?ಉ ?ು) (?ಊ ?ೂ) |
213 | (?ಋ ?ೃ) (?ಌ nil) nil (?ಏ ?ೇ) (?ಎ ?ೆ) (?ಐ ?ೈ) | |
214 | nil (?ಓ ?ೋ) (?ಒ ?ೊ) (?ಔ ?ೌ) (?ೠ ?ೄ) (?ೡ nil)) | |
c7357293 | 215 | (;; CONSONANTS |
4b725a70 PE |
216 | ?ಕ ?ಖ ?ಗ ?ಘ ?ಙ ;; GUTTRULS |
217 | ?ಚ ?ಛ ?ಜ ?ಝ ?ಞ ;; PALATALS | |
218 | ?ಟ ?ಠ ?ಡ ?ಢ ?ಣ ;; CEREBRALS | |
219 | ?ತ ?ಥ ?ದ ?ಧ ?ನ nil ;; DENTALS | |
220 | ?ಪ ?ಫ ?ಬ ?ಭ ?ಮ ;; LABIALS | |
221 | ?ಯ ?ರ ?ಱ ?ಲ ?ಳ nil ?ವ ;; SEMIVOWELS | |
222 | ?ಶ ?ಷ ?ಸ ?ಹ ;; SIBILANTS | |
223 | nil nil nil nil nil nil ?ೞ nil ;; NUKTAS | |
224 | "ಜ್ಞ" "ಕ್ಷ") | |
c7357293 | 225 | (;; Misc Symbols |
4b725a70 | 226 | nil ?ಂ ?ಃ nil ?್ nil nil) |
c7357293 | 227 | (;; Digits |
4b725a70 | 228 | ?೦ ?೧ ?೨ ?೩ ?೪ ?೫ ?೬ ?೭ ?೮ ?೯) |
c7357293 | 229 | (;; Inscript-extra (4) (#, $, ^, *, ]) |
4b725a70 | 230 | "್ರ" "ರ್" "ತ್ರ" "ಶ್ರ" nil))) |
c7357293 KH |
231 | |
232 | (defvar indian-mlm-base-table | |
233 | '( | |
234 | (;; VOWELS | |
4b725a70 PE |
235 | (?അ nil) (?ആ ?ാ) (?ഇ ?ി) (?ഈ ?ീ) (?ഉ ?ു) (?ഊ ?ൂ) |
236 | (?ഋ ?ൃ) (?ഌ nil) nil (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ) | |
237 | nil (?ഓ ?ോ) (?ഒ ?ൊ) (?ഔ ?ൌ) nil nil) | |
c7357293 | 238 | (;; CONSONANTS |
4b725a70 PE |
239 | ?ക ?ഖ ?ഗ ?ഘ ?ങ ;; GUTTRULS |
240 | ?ച ?ഛ ?ജ ?ഝ ?ഞ ;; PALATALS | |
241 | ?ട ?ഠ ?ഡ ?ഢ ?ണ ;; CEREBRALS | |
242 | ?ത ?ഥ ?ദ ?ധ ?ന nil ;; DENTALS | |
243 | ?പ ?ഫ ?ബ ?ഭ ?മ ;; LABIALS | |
244 | ?യ ?ര ?റ ?ല ?ള ?ഴ ?വ ;; SEMIVOWELS | |
245 | ?ശ ?ഷ ?സ ?ഹ ;; SIBILANTS | |
c7357293 | 246 | nil nil nil nil nil nil nil nil ;; NUKTAS |
4b725a70 | 247 | "ജ്ഞ" "ക്ഷ") |
c7357293 | 248 | (;; Misc Symbols |
4b725a70 | 249 | nil ?ം ?ഃ nil ?് nil nil) |
c7357293 | 250 | (;; Digits |
4b725a70 | 251 | ?൦ ?൧ ?൨ ?൩ ?൪ ?൫ ?൬ ?൭ ?൮ ?൯) |
c7357293 | 252 | (;; Inscript-extra (4) (#, $, ^, *, ]) |
4b725a70 | 253 | "്ര" "ര്" "ത്ര" "ശ്ര" nil))) |
c7357293 KH |
254 | |
255 | (defvar indian-tml-base-table | |
256 | '( | |
257 | (;; VOWELS | |
4b725a70 PE |
258 | (?அ nil) (?ஆ ?ா) (?இ ?ி) (?ஈ ?ீ) (?உ ?ு) (?ஊ ?ூ) |
259 | nil nil nil (?ஏ ?ே) (?எ ?ெ) (?ஐ ?ை) | |
260 | nil (?ஓ ?ோ) (?ஒ ?ொ) (?ஔ ?ௌ) nil nil) | |
c7357293 | 261 | (;; CONSONANTS |
4b725a70 PE |
262 | ?க nil nil nil ?ங ;; GUTTRULS |
263 | ?ச nil ?ஜ nil ?ஞ ;; PALATALS | |
264 | ?ட nil nil nil ?ண ;; CEREBRALS | |
265 | ?த nil nil nil ?ந ?ன ;; DENTALS | |
266 | ?ப nil nil nil ?ம ;; LABIALS | |
267 | ?ய ?ர ?ற ?ல ?ள ?ழ ?வ ;; SEMIVOWELS | |
268 | nil ?ஷ ?ஸ ?ஹ ;; SIBILANTS | |
c7357293 | 269 | nil nil nil nil nil nil nil nil ;; NUKTAS |
4b725a70 | 270 | "ஜ்ஞ" "க்ஷ") |
c7357293 | 271 | (;; Misc Symbols |
4b725a70 | 272 | nil ?ஂ ?ஃ nil ?் nil nil) |
c7357293 | 273 | (;; Digits |
4b725a70 | 274 | ?௦ ?௧ ?௨ ?௩ ?௪ ?௫ ?௬ ?௭ ?௮ ?௯) |
c7357293 | 275 | (;; Inscript-extra (4) (#, $, ^, *, ]) |
4b725a70 | 276 | "்ர" "ர்" "த்ர" nil nil))) |
cd4e9344 RS |
277 | |
278 | (defvar indian-base-table-to-language-alist | |
279 | '((indian-dev-base-table . "Devanagari") | |
a1506d29 | 280 | (indian-pnj-base-table . "Punjabi") |
cd4e9344 RS |
281 | (indian-ori-base-table . "Oriya") |
282 | (indian-bng-base-table . "Bengali") | |
283 | (indian-asm-base-table . "Assamese") | |
284 | (indian-tlg-base-table . "Telugu") | |
285 | (indian-knd-base-table . "Kannada") | |
286 | (indian-mlm-base-table . "Malayalam") | |
287 | (indian-tml-base-table . "Tamil"))) | |
288 | ||
289 | (defvar indian-itrans-v5-table | |
290 | '(;; for encode/decode | |
291 | (;; vowels -- 18 | |
292 | "a" ("aa" "A") "i" ("ii" "I") "u" ("uu" "U") | |
b057ab61 KH |
293 | ("RRi" "R^i") ("LLi" "L^i") (".c" "e.c") "E" "e" "ai" |
294 | "o.c" "O" "o" "au" ("RRI" "R^I") ("LLI" "L^I")) | |
a1506d29 | 295 | (;; consonants -- 40 |
cd4e9344 RS |
296 | "k" "kh" "g" "gh" ("~N" "N^") |
297 | "ch" ("Ch" "chh") "j" "jh" ("~n" "JN") | |
298 | "T" "Th" "D" "Dh" "N" | |
a1506d29 | 299 | "t" "th" "d" "dh" "n" "nh" |
cd4e9344 RS |
300 | "p" "ph" "b" "bh" "m" |
301 | "y" "r" "rh" "l" ("L" "ld") nil ("v" "w") | |
302 | "sh" ("Sh" "shh") "s" "h" | |
303 | "q" "K" "G" ("J" "z") ".D" ".Dh" "f" ("Y" "yh") | |
304 | ("GY" "dny") "x") | |
305 | (;; misc -- 7 | |
306 | ".N" (".n" "M") "H" ".a" ".h" ("AUM" "OM") ".."))) | |
307 | ||
dd7aa8dd KH |
308 | (defvar indian-itrans-v5-table-for-tamil |
309 | '(;; for encode/decode | |
310 | (;; vowels -- 18 | |
311 | "a" ("aa" "A") "i" ("ii" "I") "u" ("uu" "U") | |
312 | ("RRi" "R^i") ("LLi" "L^i") (".c" "e.c") "E" "e" "ai" | |
313 | "o.c" "O" "o" "au" ("RRI" "R^I") ("LLI" "L^I")) | |
314 | (;; consonants -- 40 | |
315 | "k" "kh" "g" "gh" ("~N" "N^") | |
316 | "ch" ("Ch" "chh") "j" "jh" ("~n" "JN") | |
317 | "T" "Th" "D" "Dh" "N" | |
318 | "t" "th" "d" "dh" "n" "nh" | |
319 | "p" "ph" "b" "bh" "m" | |
320 | "y" "r" "rh" "l" ("L" "ld") ("J" "z") ("v" "w") | |
321 | "sh" ("Sh" "shh") "s" "h" | |
322 | "q" "K" "G" nil ".D" ".Dh" "f" ("Y" "yh") | |
323 | ("GY" "dny") "x") | |
324 | (;; misc -- 7 | |
325 | ".N" (".n" "M") "H" ".a" ".h" ("AUM" "OM") ".."))) | |
326 | ||
cd4e9344 RS |
327 | (defvar indian-kyoto-harvard-table |
328 | '(;; for encode/decode | |
329 | (;; vowel | |
330 | "a" ("A" "aa") "i" ("I" "ii") "u" ("U" "uu") | |
331 | "R" ("L" "lR") nil nil "e" "ai" | |
332 | nil nil "o" "au" ("q" "RR" "Q") ("E" "LL" "lRR")) | |
333 | (;; consonant | |
334 | "k" "kh" "g" "gh" "G" | |
335 | "c" "ch" "j" "jh" "J" | |
336 | "T" "Th" "D" "Dh" "N" | |
337 | "t" "th" "d" "dh" "n" nil | |
338 | "p" "ph" "b" "bh" "m" | |
339 | "y" "r" nil "l" "L" nil "v" | |
340 | ("z" "Z") "S" "s" "h" | |
341 | nil nil nil nil nil nil nil nil | |
342 | nil nil) | |
343 | (;; misc | |
344 | nil "M" "H" "'" nil "." nil))) | |
345 | ||
346 | (defvar indian-harvard-table | |
347 | '(;; for encode/decode | |
348 | (;; vowel | |
349 | "a" ("A" "aa") "i" ("I" "ii") "u" ("U" "uu") | |
350 | "R" ("L" "lR") nil nil "e" "ai" | |
351 | nil nil "o" "au" ("RR" "q" "Q") ("LL" "E" "lRR")) | |
352 | (;; consonant | |
353 | "k" "kh" "g" "gh" "G" | |
354 | "c" "ch" "j" "jh" "J" | |
355 | "T" "Th" "D" "Dh" "N" | |
356 | "t" "th" "d" "dh" "n" nil | |
357 | "p" "ph" "b" "bh" "m" | |
358 | "y" "r" nil "l" "L" nil "v" | |
359 | ("z" "Z") "S" "s" "h" | |
360 | nil nil nil nil nil nil nil nil | |
361 | nil nil) | |
362 | (;; misc | |
363 | nil "M" "H" "'" nil "." nil))) | |
364 | ||
365 | (defvar indian-tokyo-table | |
366 | '(;; for encode/decode | |
367 | (;; vowel | |
368 | "a" ("A" "aa") "i" ("I" "ii") "u" ("U" "uu") | |
369 | "R" ("L" "lR") nil nil "e" "ai" | |
370 | nil nil "o" "au" ("Q" "RR" "q") ("E" "LL" "lRR")) | |
371 | (;; consonant | |
372 | "k" "kh" "g" "gh" "G" | |
373 | "c" "ch" "j" "jh" "J" | |
374 | "T" "Th" "D" "Dh" "N" | |
375 | "t" "th" "d" "dh" "n" nil | |
376 | "p" "ph" "b" "bh" "m" | |
377 | "y" "r" nil "l" "L" nil "v" | |
378 | ("Z" "z") "S" "s" "h" | |
379 | nil nil nil nil nil nil nil nil | |
380 | nil nil) | |
381 | (;; misc | |
382 | nil "M" "H" "'" nil "." nil))) | |
383 | ||
384 | (defvar indian-aiba-table | |
385 | '(;; for encode/decode | |
386 | (;; vowel | |
387 | "a" "aa" "i" "ii" "u" "uu" | |
388 | ".r" ".l" nil nil "e" "ai" | |
389 | nil nil "o" "au" "~r" "~l") | |
390 | (;; consonant | |
391 | "k" "kh" "g" "gh" "^n" | |
392 | "c" "ch" "j" "jh" "~n" | |
393 | ".t" ".th" ".d" ".dh" ".n" | |
394 | "t" "th" "d" "dh" "n" nil | |
395 | "p" "ph" "b" "bh" "m" | |
396 | "y" "r" nil "l" nil nil "v" | |
397 | "^s" ".s" "s" "h" | |
398 | nil nil nil nil nil nil nil nil | |
399 | nil nil) | |
400 | (;; misc | |
401 | nil ".m" ".h" "'" nil "." nil))) | |
402 | ||
dbd9624e KH |
403 | (defun combinatorial (head &rest tail) |
404 | (if tail | |
405 | (apply 'append | |
406 | (mapcar (lambda (y) (mapcar (lambda (x) (cons x y)) head)) | |
407 | (apply 'combinatorial tail))) | |
408 | (mapcar 'list head))) | |
cd4e9344 RS |
409 | |
410 | (defun indian--puthash-char (char trans-char hashtbls) | |
411 | (let ((encode-hash (car hashtbls)) ;; char -> trans | |
412 | (decode-hash (cdr hashtbls)) ;; trans -> char | |
413 | ) | |
414 | ;; char -- nil / char / string (/ list of vowel & matra) | |
415 | ;; trans-char -- nil / string / list of strings | |
416 | (when (and char trans-char) | |
417 | (if (stringp trans-char) (setq trans-char (list trans-char))) | |
19150538 | 418 | (if (characterp char) (setq char (char-to-string char))) |
cd4e9344 | 419 | (puthash char (car trans-char) encode-hash) |
462a7ed6 SM |
420 | (dolist (trans trans-char) |
421 | (puthash trans char decode-hash))))) | |
cd4e9344 | 422 | |
e6432b68 DL |
423 | (defun indian--map (f l1 l2) |
424 | (while l1 | |
425 | (funcall f (pop l1) (pop l2)))) | |
426 | ||
cd4e9344 | 427 | (defun indian--puthash-v (v trans-v hashtbls) |
a1506d29 | 428 | (indian--map |
e6432b68 DL |
429 | (lambda (v trans-v) |
430 | (indian--puthash-char (car v) trans-v hashtbls)) | |
cd4e9344 RS |
431 | v trans-v)) |
432 | ||
433 | (defun indian--puthash-c (c trans-c halant hashtbls) | |
e6432b68 DL |
434 | (indian--map |
435 | (lambda (c trans-c) | |
19150538 | 436 | (if (characterp c) (setq c (char-to-string c))) |
e6432b68 | 437 | (indian--puthash-char (concat c halant) trans-c hashtbls)) |
cd4e9344 RS |
438 | c trans-c)) |
439 | ||
440 | (defun indian--puthash-m (m trans-m hashtbls) | |
e6432b68 DL |
441 | (indian--map |
442 | (lambda (m trans-m) | |
443 | (indian--puthash-char m trans-m hashtbls)) | |
cd4e9344 RS |
444 | m trans-m)) |
445 | ||
446 | (defun indian--puthash-cv (c trans-c v trans-v hashtbls) | |
e6432b68 DL |
447 | (indian--map |
448 | (lambda (c trans-c) | |
449 | (indian--map | |
450 | (lambda (v trans-v) | |
451 | (when (and c trans-c v trans-v) | |
19150538 DL |
452 | (if (characterp c) (setq c (char-to-string c))) |
453 | (setq v (if (characterp (cadr v)) (char-to-string (cadr v)) "")) | |
e6432b68 DL |
454 | (if (stringp trans-c) (setq trans-c (list trans-c))) |
455 | (if (stringp trans-v) (setq trans-v (list trans-v))) | |
a1506d29 | 456 | (indian--puthash-char |
e6432b68 | 457 | (concat c v) |
dbd9624e KH |
458 | (mapcar (lambda (x) (apply 'concat x)) |
459 | (combinatorial trans-c trans-v)) | |
e6432b68 DL |
460 | hashtbls))) |
461 | v trans-v)) | |
cd4e9344 RS |
462 | c trans-c)) |
463 | ||
464 | (defun indian-make-hash (table trans-table) | |
465 | "Indian Transliteration Hash for decode/encode" | |
b1432379 SM |
466 | (let* ((encode-hash (make-hash-table :test 'equal)) |
467 | (decode-hash (make-hash-table :test 'equal)) | |
cd4e9344 RS |
468 | (hashtbls (cons encode-hash decode-hash)) |
469 | (vowels (elt table 0)) | |
470 | (consonants (elt table 1)) | |
471 | (misc (elt table 2)) | |
472 | (digits (elt table 3)) | |
473 | (halant (char-to-string (elt misc 4))) | |
474 | (trans-vowels (elt trans-table 0)) | |
475 | (trans-consonants (elt trans-table 1)) | |
476 | (trans-misc (elt trans-table 2)) | |
477 | (trans-digits '("0" "1" "2" "3" "4" "5" "6" "7" "8" "9"))) | |
478 | (indian--puthash-v vowels trans-vowels hashtbls) | |
479 | (indian--puthash-c consonants trans-consonants halant hashtbls) | |
a1506d29 | 480 | (indian--puthash-cv consonants trans-consonants |
cd4e9344 RS |
481 | vowels trans-vowels hashtbls) |
482 | (indian--puthash-m misc trans-misc hashtbls) | |
483 | (indian--puthash-m digits trans-digits hashtbls) | |
484 | hashtbls)) | |
485 | ||
486 | (defvar indian-dev-itrans-v5-hash | |
487 | (indian-make-hash indian-dev-base-table | |
488 | indian-itrans-v5-table)) | |
489 | (defvar indian-dev-kyoto-harvard-hash | |
490 | (indian-make-hash indian-dev-base-table | |
491 | indian-kyoto-harvard-table)) | |
492 | (defvar indian-dev-aiba-hash | |
493 | (indian-make-hash indian-dev-base-table | |
494 | indian-aiba-table)) | |
495 | ||
c7357293 KH |
496 | (defvar indian-pnj-itrans-v5-hash |
497 | (indian-make-hash indian-pnj-base-table | |
498 | indian-itrans-v5-table)) | |
499 | ||
500 | (defvar indian-gjr-itrans-v5-hash | |
501 | (indian-make-hash indian-gjr-base-table | |
502 | indian-itrans-v5-table)) | |
503 | ||
504 | (defvar indian-ori-itrans-v5-hash | |
505 | (indian-make-hash indian-ori-base-table | |
506 | indian-itrans-v5-table)) | |
507 | ||
508 | (defvar indian-bng-itrans-v5-hash | |
509 | (indian-make-hash indian-bng-base-table | |
510 | indian-itrans-v5-table)) | |
511 | ||
512 | (defvar indian-asm-itrans-v5-hash | |
513 | (indian-make-hash indian-asm-base-table | |
514 | indian-itrans-v5-table)) | |
515 | ||
516 | (defvar indian-tlg-itrans-v5-hash | |
517 | (indian-make-hash indian-tlg-base-table | |
518 | indian-itrans-v5-table)) | |
519 | ||
520 | (defvar indian-knd-itrans-v5-hash | |
521 | (indian-make-hash indian-knd-base-table | |
522 | indian-itrans-v5-table)) | |
523 | ||
524 | (defvar indian-mlm-itrans-v5-hash | |
525 | (indian-make-hash indian-mlm-base-table | |
526 | indian-itrans-v5-table)) | |
527 | ||
528 | (defvar indian-tml-itrans-v5-hash | |
529 | (indian-make-hash indian-tml-base-table | |
dd7aa8dd | 530 | indian-itrans-v5-table-for-tamil)) |
cd4e9344 RS |
531 | ) |
532 | ||
533 | (defmacro indian-translate-region (from to hashtable encode-p) | |
534 | `(save-excursion | |
535 | (save-restriction | |
a1506d29 JB |
536 | (let ((regexp ,(indian-regexp-of-hashtbl-keys |
537 | (if encode-p (car (eval hashtable)) | |
cd4e9344 RS |
538 | (cdr (eval hashtable)))))) |
539 | (narrow-to-region from to) | |
540 | (goto-char (point-min)) | |
541 | (while (re-search-forward regexp nil t) | |
a1506d29 | 542 | (let ((matchstr (gethash (match-string 0) |
e6432b68 DL |
543 | (if ,encode-p |
544 | (car ,hashtable) | |
545 | (cdr ,hashtable))))) | |
cd4e9344 RS |
546 | (if matchstr (replace-match matchstr)))))))) |
547 | ||
548 | ;;; | |
549 | ||
550 | (defun indian-dev-itrans-v5-encode-region (from to) | |
551 | (interactive "r") | |
a1506d29 | 552 | (indian-translate-region |
cd4e9344 RS |
553 | from to indian-dev-itrans-v5-hash t)) |
554 | ||
555 | (defun indian-dev-itrans-v5-decode-region (from to) | |
556 | (interactive "r") | |
557 | (indian-translate-region | |
558 | from to indian-dev-itrans-v5-hash nil)) | |
559 | ||
560 | (defun indian-dev-kyoto-harvard-encode-region (from to) | |
561 | (interactive "r") | |
a1506d29 | 562 | (indian-translate-region |
cd4e9344 RS |
563 | from to indian-dev-kyoto-harvard-hash t)) |
564 | ||
565 | (defun indian-dev-kyoto-harvard-decode-region (from to) | |
566 | (interactive "r") | |
567 | (indian-translate-region | |
568 | from to indian-dev-kyoto-harvard-hash nil)) | |
569 | ||
570 | (defun indian-dev-aiba-encode-region (from to) | |
571 | (interactive "r") | |
a1506d29 | 572 | (indian-translate-region |
cd4e9344 RS |
573 | from to indian-dev-aiba-hash t)) |
574 | ||
575 | (defun indian-dev-aiba-decode-region (from to) | |
576 | (interactive "r") | |
577 | (indian-translate-region | |
578 | from to indian-dev-aiba-hash nil)) | |
579 | ||
580 | ||
581 | ||
582 | ||
583 | ;;; IS 13194 utilities | |
584 | ||
585 | ;; The followings provide conversion between IS 13194 (ISCII) and UCS. | |
586 | ||
3ff57641 | 587 | (let |
8f924df7 | 588 | ;;Unicode vs IS13194 ;; only Devanagari is supported now. |
3ff57641 DL |
589 | ((ucs-devanagari-to-is13194-alist |
590 | '((?\x0900 . "[U+0900]") | |
4b725a70 PE |
591 | (?\x0901 . "") |
592 | (?\x0902 . "") | |
593 | (?\x0903 . "") | |
3ff57641 | 594 | (?\x0904 . "[U+0904]") |
4b725a70 PE |
595 | (?\x0905 . "") |
596 | (?\x0906 . "") | |
597 | (?\x0907 . "") | |
598 | (?\x0908 . "") | |
599 | (?\x0909 . "") | |
600 | (?\x090a . "") | |
601 | (?\x090b . "") | |
602 | (?\x090c . "") | |
603 | (?\x090d . "") | |
604 | (?\x090e . "") | |
605 | (?\x090f . "") | |
606 | (?\x0910 . "") | |
607 | (?\x0911 . "") | |
608 | (?\x0912 . "") | |
609 | (?\x0913 . "") | |
610 | (?\x0914 . "") | |
611 | (?\x0915 . "") | |
612 | (?\x0916 . "") | |
613 | (?\x0917 . "") | |
614 | (?\x0918 . "") | |
615 | (?\x0919 . "") | |
616 | (?\x091a . "") | |
617 | (?\x091b . "") | |
618 | (?\x091c . "") | |
619 | (?\x091d . "") | |
620 | (?\x091e . "") | |
621 | (?\x091f . "") | |
622 | (?\x0920 . "") | |
623 | (?\x0921 . "") | |
624 | (?\x0922 . "") | |
625 | (?\x0923 . "") | |
626 | (?\x0924 . "") | |
627 | (?\x0925 . "") | |
628 | (?\x0926 . "") | |
629 | (?\x0927 . "") | |
630 | (?\x0928 . "") | |
631 | (?\x0929 . "") | |
632 | (?\x092a . "") | |
633 | (?\x092b . "") | |
634 | (?\x092c . "") | |
635 | (?\x092d . "") | |
636 | (?\x092e . "") | |
637 | (?\x092f . "") | |
638 | (?\x0930 . "") | |
639 | (?\x0931 . "") | |
640 | (?\x0932 . "") | |
641 | (?\x0933 . "") | |
642 | (?\x0934 . "") | |
643 | (?\x0935 . "") | |
644 | (?\x0936 . "") | |
645 | (?\x0937 . "") | |
646 | (?\x0938 . "") | |
647 | (?\x0939 . "") | |
3ff57641 DL |
648 | (?\x093a . "[U+093a]") |
649 | (?\x093b . "[U+093b]") | |
4b725a70 PE |
650 | (?\x093c . "") |
651 | (?\x093d . "") | |
652 | (?\x093e . "") | |
653 | (?\x093f . "") | |
654 | (?\x0940 . "") | |
655 | (?\x0941 . "") | |
656 | (?\x0942 . "") | |
657 | (?\x0943 . "") | |
658 | (?\x0944 . "") | |
659 | (?\x0945 . "") | |
660 | (?\x0946 . "") | |
661 | (?\x0947 . "") | |
662 | (?\x0948 . "") | |
663 | (?\x0949 . "") | |
664 | (?\x094a . "") | |
665 | (?\x094b . "") | |
666 | (?\x094c . "") | |
667 | (?\x094d . "") | |
3ff57641 DL |
668 | (?\x094e . "[U+094e]") |
669 | (?\x094f . "[U+094f]") | |
4b725a70 PE |
670 | (?\x0950 . "") |
671 | (?\x0951 . "") | |
672 | (?\x0952 . "") | |
3ff57641 DL |
673 | (?\x0953 . "[DEVANAGARI GRAVE ACCENT]") |
674 | (?\x0954 . "[DEVANAGARI ACUTE ACCENT]") | |
675 | (?\x0955 . "[U+0955]") | |
676 | (?\x0956 . "[U+0956]") | |
677 | (?\x0957 . "[U+0957]") | |
4b725a70 PE |
678 | (?\x0958 . "") |
679 | (?\x0959 . "") | |
680 | (?\x095a . "") | |
681 | (?\x095b . "") | |
682 | (?\x095c . "") | |
683 | (?\x095d . "") | |
684 | (?\x095e . "") | |
685 | (?\x095f . "") | |
686 | (?\x0960 . "") | |
687 | (?\x0961 . "") | |
688 | (?\x0962 . "") | |
689 | (?\x0963 . "") | |
690 | (?\x0964 . "") | |
691 | (?\x0965 . "") | |
692 | (?\x0966 . "") | |
693 | (?\x0967 . "") | |
694 | (?\x0968 . "") | |
695 | (?\x0969 . "") | |
696 | (?\x096a . "") | |
697 | (?\x096b . "") | |
698 | (?\x096c . "") | |
699 | (?\x096d . "") | |
700 | (?\x096e . "") | |
701 | (?\x096f . "") | |
3ff57641 DL |
702 | (?\x0970 . "[U+0970]") |
703 | (?\x0971 . "[U+0971]") | |
704 | (?\x0972 . "[U+0972]") | |
705 | (?\x0973 . "[U+0973]") | |
706 | (?\x0974 . "[U+0974]") | |
707 | (?\x0975 . "[U+0975]") | |
708 | (?\x0976 . "[U+0976]") | |
709 | (?\x0977 . "[U+0977]") | |
710 | (?\x0978 . "[U+0978]") | |
711 | (?\x0979 . "[U+0979]") | |
712 | (?\x097a . "[U+097a]") | |
713 | (?\x097b . "[U+097b]") | |
714 | (?\x097c . "[U+097c]") | |
715 | (?\x097d . "[U+097d]") | |
716 | (?\x097e . "[U+097e]") | |
717 | (?\x097f . "[U+097f]"))) | |
718 | (ucs-bengali-to-is13194-alist nil) | |
719 | (ucs-assamese-to-is13194-alist nil) | |
720 | (ucs-gurmukhi-to-is13194-alist nil) | |
721 | (ucs-gujarati-to-is13194-alist nil) | |
722 | (ucs-oriya-to-is13194-alist nil) | |
723 | (ucs-tamil-to-is13194-alist nil) | |
724 | (ucs-telugu-to-is13194-alist nil) | |
6b61353c KH |
725 | (ucs-malayalam-to-is13194-alist nil) |
726 | (ucs-kannada-to-is13194-alist nil)) | |
3ff57641 | 727 | (dolist (script '(devanagari bengali assamese gurmukhi gujarati |
6b61353c | 728 | oriya tamil telugu malayalam kannada)) |
f47a2e09 DL |
729 | (let ((hashtable (intern (concat "is13194-to-ucs-" |
730 | (symbol-name script) "-hashtbl" ))) | |
731 | (regexp (intern (concat "is13194-to-ucs-" | |
732 | (symbol-name script) "-regexp")))) | |
cd4e9344 | 733 | (set hashtable (make-hash-table :test 'equal :size 128)) |
3ff57641 DL |
734 | (dolist (x (eval (intern (concat "ucs-" (symbol-name script) |
735 | "-to-is13194-alist")))) | |
736 | (put-char-code-property (car x) 'script script) | |
737 | (put-char-code-property (car x) 'iscii (cdr x)) | |
738 | (puthash (cdr x) (char-to-string (car x)) (eval hashtable))) | |
8f924df7 KH |
739 | (set regexp (indian-regexp-of-hashtbl-keys (eval hashtable)))))) |
740 | ||
741 | (defvar is13194-default-repertory 'devanagari) | |
cd4e9344 RS |
742 | |
743 | (defvar is13194-repertory-to-ucs-script | |
8f924df7 KH |
744 | `((DEF ?\x40 ,is13194-default-repertory) |
745 | (RMN ?\x41 ,is13194-default-repertory) | |
cd4e9344 RS |
746 | (DEV ?\x42 devanagari) |
747 | (BNG ?\x43 bengali) | |
748 | (TML ?\x44 tamil) | |
749 | (TLG ?\x45 telugu) | |
750 | (ASM ?\x46 bengali) | |
751 | (ORI ?\x47 oriya) | |
752 | (KND ?\x48 kannada) | |
753 | (MLM ?\x49 malayalam) | |
754 | (GJR ?\x4a gujarati) | |
755 | (PNJ ?\x4b gurmukhi))) | |
756 | ||
757 | ;; for guiding find-variable function. | |
758 | (defvar is13194-to-ucs-devanagari-hashtbl nil) | |
759 | (defvar is13194-to-ucs-devanagari-regexp nil) | |
760 | (defvar is13194-to-ucs-bengali-hashtbl nil) | |
761 | (defvar is13194-to-ucs-bengali-regexp nil) | |
762 | (defvar is13194-to-ucs-assamese-hashtbl nil) | |
763 | (defvar is13194-to-ucs-assamese-regexp nil) | |
764 | (defvar is13194-to-ucs-gurmukhi-hashtbl nil) | |
765 | (defvar is13194-to-ucs-gurmukhi-regexp nil) | |
766 | (defvar is13194-to-ucs-gujarati-hashtbl nil) | |
767 | (defvar is13194-to-ucs-gujarati-regexp nil) | |
768 | (defvar is13194-to-ucs-oriya-hashtbl nil) | |
769 | (defvar is13194-to-ucs-oriya-regexp nil) | |
770 | (defvar is13194-to-ucs-tamil-hashtbl nil) | |
771 | (defvar is13194-to-ucs-tamil-regexp nil) | |
772 | (defvar is13194-to-ucs-telugu-hashtbl nil) | |
773 | (defvar is13194-to-ucs-telugu-regexp nil) | |
774 | (defvar is13194-to-ucs-malayalam-hashtbl nil) | |
775 | (defvar is13194-to-ucs-malayalam-regexp nil) | |
6b61353c KH |
776 | (defvar is13194-to-ucs-kannada-hashtbl nil) |
777 | (defvar is13194-to-ucs-kannada-regexp nil) | |
cd4e9344 | 778 | |
cd4e9344 RS |
779 | (defvar ucs-to-is13194-regexp |
780 | ;; only Devanagari is supported now. | |
19150538 | 781 | (concat "[" (char-to-string #x0900) |
8f924df7 | 782 | "-" (char-to-string #x097f) "]") |
cd4e9344 RS |
783 | "Regexp that matches to conversion") |
784 | ||
785 | (defun ucs-to-iscii-region (from to) | |
a1506d29 | 786 | "Converts the indian UCS characters in the region to ISCII. |
cd4e9344 RS |
787 | Returns new end position." |
788 | (interactive "r") | |
789 | ;; only Devanagari is supported now. | |
790 | (save-excursion | |
791 | (save-restriction | |
792 | (narrow-to-region from to) | |
793 | (goto-char (point-min)) | |
f47a2e09 DL |
794 | (let* ((current-repertory is13194-default-repertory)) |
795 | (while (re-search-forward ucs-to-is13194-regexp nil t) | |
796 | (replace-match | |
797 | (get-char-code-property (string-to-char (match-string 0)) | |
798 | 'iscii)))) | |
cd4e9344 RS |
799 | (point-max)))) |
800 | ||
801 | (defun iscii-to-ucs-region (from to) | |
a1506d29 | 802 | "Converts the ISCII characters in the region to UCS. |
cd4e9344 RS |
803 | Returns new end position." |
804 | (interactive "r") | |
805 | ;; only Devanagari is supported now. | |
806 | (save-excursion | |
807 | (save-restriction | |
808 | (narrow-to-region from to) | |
809 | (goto-char (point-min)) | |
f47a2e09 DL |
810 | (let* ((current-repertory is13194-default-repertory) |
811 | (current-hashtable | |
812 | (intern (concat "is13194-to-ucs-" | |
813 | (symbol-name current-repertory) "-hashtbl"))) | |
814 | (current-regexp | |
815 | (intern (concat "is13194-to-ucs-" | |
816 | (symbol-name current-repertory) "-regexp"))) | |
817 | (re (eval current-regexp)) | |
12bb15ad | 818 | (hash (eval current-hashtable))) |
f47a2e09 DL |
819 | (while (re-search-forward re nil t) |
820 | (replace-match (gethash (match-string 0) hash "")))) | |
cd4e9344 RS |
821 | (point-max)))) |
822 | ||
823 | ;;;###autoload | |
824 | (defun indian-compose-region (from to) | |
6acef79f | 825 | "Compose the region according to `composition-function-table'." |
cd4e9344 RS |
826 | (interactive "r") |
827 | (save-excursion | |
828 | (save-restriction | |
6acef79f | 829 | (let ((pos from) newpos func (max to)) |
f47a2e09 DL |
830 | (narrow-to-region from to) |
831 | (while (< pos max) | |
6acef79f KH |
832 | (setq func (aref composition-function-table (char-after pos))) |
833 | (if (fboundp func) | |
834 | (setq newpos (funcall func pos nil) | |
835 | pos (if (and (integerp newpos) (> newpos pos)) | |
836 | newpos (1+ pos))) | |
837 | (setq pos (1+ pos)))))))) | |
cd4e9344 RS |
838 | |
839 | ;;;###autoload | |
840 | (defun indian-compose-string (string) | |
a1506d29 | 841 | (with-temp-buffer |
cd4e9344 RS |
842 | (insert string) |
843 | (indian-compose-region (point-min) (point-max)) | |
844 | (buffer-string))) | |
845 | ||
846 | ;;;###autoload | |
847 | (defun in-is13194-post-read-conversion (len) | |
848 | (let ((pos (point)) endpos) | |
849 | (setq endpos (iscii-to-ucs-region pos (+ pos len))) | |
cd4e9344 RS |
850 | (- endpos pos))) |
851 | ||
852 | ;;;###autoload | |
853 | (defun in-is13194-pre-write-conversion (from to) | |
854 | (let ((buf (current-buffer))) | |
855 | (set-buffer (generate-new-buffer " *temp*")) | |
856 | (if (stringp from) | |
857 | (insert from) | |
858 | (insert-buffer-substring buf from to)) | |
859 | (ucs-to-iscii-region (point-min) (point-max)) | |
860 | nil)) | |
861 | ||
862 | ||
863 | ||
864 | ||
865 | ;;; Backward Compatibility support programs | |
866 | ||
f47a2e09 | 867 | ;; The following provides the conversion from old-implementation of |
cd4e9344 RS |
868 | ;; Emacs Devanagari script to UCS. |
869 | ||
870 | (defconst indian-2-colum-to-ucs | |
871 | '( | |
872 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f | |
4b725a70 PE |
873 | ;;2120 |
874 | ("" . "ँ") | |
875 | ("" . "ं") | |
876 | ("" . "ः") | |
877 | ("" . "अ") | |
878 | ("" . "आ") | |
879 | ("" . "इ") | |
880 | ("" . "ई") | |
881 | ("" . "उ") | |
882 | ("" . "ऊ") | |
883 | ("" . "ऋ") | |
884 | ("" . "रृ") | |
885 | ("" . "ऎ") | |
886 | ("" . "ए") | |
887 | ("" . "ऐ") | |
888 | ("" . "ऍ") | |
889 | ("" . "ऒ") | |
cd4e9344 | 890 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
891 | ;;2130 |
892 | ("" . "ओ") | |
893 | ("" . "औ") | |
894 | ("" . "ऑ") | |
895 | ("" . "क") | |
896 | ("" . "ख") | |
897 | ("" . "ग") | |
898 | ("" . "घ") | |
899 | ("" . "ङ") | |
900 | ("" . "च") | |
901 | ("" . "छ") | |
902 | ("" . "ज") | |
903 | ("" . "झ") | |
904 | ("" . "ञ") | |
905 | ("" . "ट") | |
906 | ("" . "ठ") | |
907 | ("" . "ड") | |
cd4e9344 | 908 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
909 | ;;2140 |
910 | ("" . "ढ") | |
911 | ("" . "ण") | |
912 | ("" . "त") | |
913 | ("" . "थ") | |
914 | ("" . "द") | |
915 | ("" . "ध") | |
916 | ("" . "न") | |
917 | ("" . "ऩ") | |
918 | ("" . "प") | |
919 | ("" . "फ") | |
920 | ("" . "ब") | |
921 | ("" . "भ") | |
922 | ("" . "म") | |
923 | ("" . "य") | |
924 | ("" . "य़") | |
925 | ("" . "र") | |
cd4e9344 | 926 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
927 | ;;2150 |
928 | ("" . "ऱ") | |
929 | ("" . "ल") | |
930 | ("" . "ळ") | |
931 | ("" . "ऴ") | |
932 | ("" . "व") | |
933 | ("" . "श") | |
934 | ("" . "ष") | |
935 | ("" . "स") | |
936 | ("" . "ह") | |
937 | ("" . "ा") | |
938 | ("" . "ि") | |
939 | ("" . "ी") | |
940 | ("" . "ु") | |
941 | ("" . "ू") | |
942 | ("" . "ृ") | |
cd4e9344 | 943 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
944 | ;;2160 |
945 | ("" . "ॆ") | |
946 | ("" . "े") | |
947 | ("" . "ै") | |
948 | ("" . "ॅ") | |
949 | ("" . "ॊ") | |
950 | ("" . "ो") | |
951 | ("" . "ौ") | |
952 | ("" . "ॉ") | |
953 | ("" . "्") | |
954 | ("" . "़") | |
955 | ("" . "।") | |
956 | ("" . "॥") | |
cd4e9344 | 957 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
958 | ;;2170 |
959 | ("" . "०") | |
960 | ("" . "१") | |
961 | ("" . "२") | |
962 | ("" . "३") | |
963 | ("" . "४") | |
964 | ("" . "५") | |
965 | ("" . "६") | |
966 | ("" . "७") | |
967 | ("" . "८") | |
968 | ("" . "९") | |
cd4e9344 | 969 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
970 | ;;2220 |
971 | ("" . "ज़्र") | |
972 | ("" . "फ़्र") | |
973 | ("" . "क्र") | |
974 | ("" . "ग्र") | |
975 | ("" . "त्र") | |
976 | ("" . "प्र") | |
977 | ("" . "फ्र") | |
978 | ("" . "श्र") | |
979 | ("" . "रु") | |
980 | ("" . "रू") | |
981 | ("" . "ऱु") | |
982 | ("" . "ऱू") | |
cd4e9344 | 983 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
984 | ;;2230 |
985 | ("" . "क्") | |
986 | ("" . "ख्") | |
987 | ("" . "ग्") | |
988 | ("" . "घ्") | |
989 | ("" . "च्") | |
990 | ("" . "च्र्") | |
991 | ("" . "ज्") | |
992 | ("" . "झ्") | |
993 | ("" . "ञ्") | |
994 | ("" . "ञ्") | |
cd4e9344 | 995 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
996 | ;;2240 |
997 | ("" . "ण्") | |
998 | ("" . "त्") | |
999 | ("" . "थ्") | |
1000 | ("" . "ध्") | |
1001 | ("" . "न्") | |
1002 | ("" . "ऩ्") | |
1003 | ("" . "प्") | |
1004 | ("" . "फ्") | |
1005 | ("" . "ब्") | |
1006 | ("" . "ब्") | |
1007 | ("" . "भ्") | |
1008 | ("" . "म्") | |
1009 | ("" . "य्") | |
1010 | ("" . "य़्") | |
cd4e9344 | 1011 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
1012 | ;;2250 |
1013 | ("" . "ल्") | |
1014 | ("" . "ळ्") | |
1015 | ("" . "ऴ्") | |
1016 | ("" . "व्") | |
1017 | ("" . "श्") | |
1018 | ("" . "ष्") | |
1019 | ("" . "स्") | |
1020 | ("" . "्य") | |
cd4e9344 | 1021 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
1022 | ;;2260 |
1023 | ("" . "ग्र्") | |
1024 | ("" . "घ्न्") | |
1025 | ("" . "त्त्") | |
1026 | ("" . "त्र्") | |
1027 | ("" . "ध्न्") | |
1028 | ("" . "ध्र्") | |
1029 | ("" . "प्त्") | |
1030 | ("" . "श्च्") | |
1031 | ("" . "श्र्") | |
1032 | ("" . "श्व्") | |
1033 | ("" . "न्न्") | |
1034 | ("" . "क्ष्") | |
1035 | ("" . "ज्ञ्") | |
cd4e9344 | 1036 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
1037 | ;;2270 |
1038 | ("" . "र्") | |
1039 | ("" . "्र") | |
1040 | ("" . "्र") | |
1041 | ("" . "क़्") | |
1042 | ("" . "ख़्") | |
1043 | ("" . "ग़्") | |
1044 | ("" . "फ़्") | |
1045 | ("" . "ज़्") | |
cd4e9344 | 1046 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
1047 | ;;2320 |
1048 | ("" . "ॐ") | |
1049 | ("" . "ऌ") | |
1050 | ("" . "रॄ") | |
1051 | ("" . "ॡ") | |
1052 | ("" . "रॣ") | |
1053 | ("" . "ॠ") | |
1054 | ("" . "रॢ") | |
cd4e9344 | 1055 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
1056 | ;;2330 |
1057 | ("" . "क़") | |
1058 | ("" . "ख़") | |
1059 | ("" . "ग़") | |
1060 | ("" . "ज़") | |
1061 | ("" . "ड़") | |
cd4e9344 | 1062 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
1063 | ;;2340 |
1064 | ("" . "ढ़") | |
1065 | ("" . "फ़") | |
1066 | ("" . "ऽ") | |
1067 | ("" . "ॄ") | |
1068 | ("" . "ॢ") | |
1069 | ("" . "ॣ") | |
cd4e9344 | 1070 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
1071 | ;;2350 |
1072 | ("" . "म्न") | |
1073 | ("" . "म्ल") | |
1074 | ("" . "हृ") | |
cd4e9344 | 1075 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
1076 | ;;2360 |
1077 | ("" . "ल्ल") | |
1078 | ("" . "व्न") | |
1079 | ("" . "व्व") | |
1080 | ("" . "श्च") | |
1081 | ("" . "श्न") | |
1082 | ("" . "श्ब") | |
1083 | ("" . "श्ल") | |
1084 | ("" . "श्व") | |
1085 | ("" . "ष्ट्र्य") | |
1086 | ("" . "ष्ट्य") | |
1087 | ("" . "ष्ट्व") | |
1088 | ("" . "ष्ट") | |
1089 | ("" . "ष्ठ") | |
1090 | ("" . "स्न") | |
1091 | ("" . "स्र") | |
cd4e9344 | 1092 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
1093 | ;;2370 |
1094 | ("" . "ह्ण") | |
1095 | ("" . "ह्न") | |
1096 | ("" . "ह्म") | |
1097 | ("" . "ह्य") | |
1098 | ("" . "ह्र") | |
1099 | ("" . "ह्ल") | |
1100 | ("" . "ह्व") | |
cd4e9344 | 1101 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
1102 | ;;2420 |
1103 | ("" . "क्त्र्य") | |
1104 | ("" . "क्त्व") | |
1105 | ("" . "क्त्य") | |
1106 | ("" . "क्न्य") | |
1107 | ("" . "क्र्य") | |
1108 | ("" . "क्व्य") | |
1109 | ("" . "क्क") | |
1110 | ("" . "क्त") | |
1111 | ("" . "क्न") | |
1112 | ("" . "क्म") | |
1113 | ("" . "क्य") | |
1114 | ("" . "क्ल") | |
1115 | ("" . "क्व") | |
1116 | ("" . "क्ष") | |
1117 | ("" . "घ्न") | |
cd4e9344 | 1118 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
1119 | ;;2430 |
1120 | ("" . "ङ्क्त्य") | |
1121 | ("" . "ङ्क्ष्व") | |
1122 | ("" . "ङ्क्त") | |
1123 | ("" . "ङ्क्ष") | |
1124 | ("" . "ङ्घ्र") | |
1125 | ("" . "ङ्क्य") | |
1126 | ("" . "ङ्ख्य") | |
1127 | ("" . "ङ्ग्य") | |
1128 | ("" . "ङ्घ्य") | |
1129 | ("" . "ङ्क") | |
1130 | ("" . "ङ्ख") | |
1131 | ("" . "ङ्ग") | |
1132 | ("" . "ङ्घ") | |
1133 | ("" . "ङ्ङ") | |
1134 | ("" . "ङ्न") | |
1135 | ("" . "ङ्म") | |
cd4e9344 | 1136 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
1137 | ;;2440 |
1138 | ("" . "ङ्य") | |
1139 | ("" . "च्च") | |
1140 | ("" . "च्ञ") | |
1141 | ("" . "छ्य") | |
1142 | ("" . "ज्र") | |
1143 | ("" . "ज्ञ") | |
1144 | ("" . "ञ्च") | |
1145 | ("" . "ञ्ज") | |
1146 | ("" . "ट्क") | |
1147 | ("" . "ट्ट") | |
1148 | ("" . "ट्ठ") | |
1149 | ("" . "ट्य") | |
1150 | ("" . "ठ्य") | |
1151 | ("" . "ड्ग्य") | |
1152 | ("" . "ड्घ्र") | |
1153 | ("" . "ड्र्य") | |
cd4e9344 | 1154 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
1155 | ;;2450 |
1156 | ("" . "ड्ग") | |
1157 | ("" . "ड्घ") | |
1158 | ("" . "ड्ड") | |
1159 | ("" . "ड्म") | |
1160 | ("" . "ड्य") | |
cd4e9344 | 1161 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
1162 | ;;2460 |
1163 | ("" . "ढ्य") | |
1164 | ("" . "त्त") | |
1165 | ("" . "त्न") | |
1166 | ("" . "द्द्य") | |
1167 | ("" . "द्ध्य") | |
1168 | ("" . "द्भ्य") | |
1169 | ("" . "द्र्य") | |
1170 | ("" . "द्व्य") | |
1171 | ("" . "द्ग्र") | |
1172 | ("" . "द्घ्र") | |
1173 | ("" . "द्द्व") | |
1174 | ("" . "द्ध्व") | |
1175 | ("" . "द्ग") | |
1176 | ("" . "द्घ") | |
1177 | ("" . "द्द") | |
1178 | ("" . "द्ध") | |
cd4e9344 | 1179 | ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f |
4b725a70 PE |
1180 | ;;2470 |
1181 | ("" . "द्न") | |
1182 | ("" . "द्ब") | |
1183 | ("" . "द्भ") | |
1184 | ("" . "द्म") | |
1185 | ("" . "द्य") | |
1186 | ("" . "द्व") | |
1187 | ("" . "ध्न") | |
1188 | ("" . "न्न") | |
1189 | ("" . "प्त") | |
1190 | ("" . "प्न") | |
1191 | ("" . "प्ल") | |
1192 | ("" . "ब्न") | |
1193 | ("" . "ब्ब") | |
1194 | ("" . "ब्व") | |
1195 | ("" . "भ्न"))) | |
cd4e9344 RS |
1196 | |
1197 | (defconst indian-2-column-to-ucs-regexp | |
4b725a70 | 1198 | "\\|\\|[]\\|[-]") |
cd4e9344 RS |
1199 | |
1200 | (put 'indian-2-column-to-ucs-chartable 'char-table-extra-slots 1) | |
1201 | (defconst indian-2-column-to-ucs-chartable | |
1202 | (let ((table (make-char-table 'indian-2-column-to-ucs-chartable)) | |
f47a2e09 | 1203 | (alist nil)) |
cd4e9344 RS |
1204 | (dolist (elt indian-2-colum-to-ucs) |
1205 | (if (= (length (car elt)) 1) | |
f47a2e09 DL |
1206 | (aset table (aref (car elt) 0) (cdr elt)) |
1207 | (setq alist (cons elt alist)))) | |
cd4e9344 RS |
1208 | (set-char-table-extra-slot table 0 alist) |
1209 | table)) | |
1210 | ||
2ba14a8f | 1211 | ;;;###autoload |
cd4e9344 RS |
1212 | (defun indian-2-column-to-ucs-region (from to) |
1213 | "Convert old Emacs Devanagari characters to UCS." | |
1214 | (interactive "r") | |
1215 | (save-excursion | |
1216 | (save-restriction | |
1217 | (let ((pos from) | |
f47a2e09 DL |
1218 | (alist (char-table-extra-slot indian-2-column-to-ucs-chartable 0))) |
1219 | (narrow-to-region from to) | |
1220 | (decompose-region from to) | |
1221 | (goto-char (point-min)) | |
1222 | (while (re-search-forward indian-2-column-to-ucs-regexp nil t) | |
1223 | (let ((len (- (match-end 0) (match-beginning 0))) | |
1224 | subst) | |
1225 | (if (= len 1) | |
1226 | (setq subst (aref indian-2-column-to-ucs-chartable | |
e6432b68 | 1227 | (char-after (match-beginning 0)))) |
8f924df7 | 1228 | (setq subst (cdr (assoc (match-string 0) alist)))) |
f47a2e09 DL |
1229 | (replace-match (if subst subst "?")))) |
1230 | (indian-compose-region (point-min) (point-max)))))) | |
a1506d29 | 1231 | |
cd4e9344 | 1232 | (provide 'ind-util) |
a1506d29 | 1233 | |
cd4e9344 | 1234 | ;;; ind-util.el ends here |