| 1 | ;;; devan-util.el --- Support for composing Devanagari characters |
| 2 | |
| 3 | ;; Copyright (C) 2001 Free Software Foundation, Inc. |
| 4 | |
| 5 | ;; Maintainer: KAWABATA, Taichi <batta@beige.ocn.ne.jp> |
| 6 | ;; Keywords: multilingual, Devanagari |
| 7 | |
| 8 | ;; This file is part of GNU Emacs. |
| 9 | |
| 10 | ;; GNU Emacs is free software; you can redistribute it and/or modify |
| 11 | ;; it under the terms of the GNU General Public License as published by |
| 12 | ;; the Free Software Foundation; either version 2, or (at your option) |
| 13 | ;; any later version. |
| 14 | |
| 15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 18 | ;; GNU General Public License for more details. |
| 19 | |
| 20 | ;; You should have received a copy of the GNU General Public License |
| 21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
| 22 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 23 | ;; Boston, MA 02111-1307, USA. |
| 24 | |
| 25 | ;; Created: Feb. 17. 2001 |
| 26 | |
| 27 | ;;; Commentary: |
| 28 | |
| 29 | ;; This file provides character(Unicode) to glyph(CDAC) conversion and |
| 30 | ;; composition of Devanagari script characters. |
| 31 | |
| 32 | ;;; Code: |
| 33 | |
| 34 | ;;;###autoload |
| 35 | |
| 36 | ;; Devanagari Composable Pattern |
| 37 | ;; C .. Consonants |
| 38 | ;; V .. Vowel |
| 39 | ;; H .. Halant |
| 40 | ;; M .. Matra |
| 41 | ;; V .. Vowel |
| 42 | ;; A .. Anuswar |
| 43 | ;; D .. Chandrabindu |
| 44 | ;; (N .. Zerowidth Non Joiner) |
| 45 | ;; (J .. Zerowidth Joiner. ) |
| 46 | ;; 1. vowel |
| 47 | ;; V(A/D)? |
| 48 | ;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya) |
| 49 | ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)? |
| 50 | |
| 51 | (defconst devanagari-consonant |
| 52 | "[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]") |
| 53 | |
| 54 | (defconst devanagari-composable-pattern |
| 55 | (concat |
| 56 | "\\([\e$,15E\e(B-\e$,15T6@6A\e(B][\e$,15A5B\e(B]?\\)\\|\e$,15C\e(B" |
| 57 | "\\|\\(" |
| 58 | "\\(?:\\(?:[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]\e$,16-\e(B\\)?\\(?:[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]\e$,16-\e(B\\)?\\(?:[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]\e$,16-\e(B\\)?[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]\e$,16-\e(B\\)?" |
| 59 | "[\e$,15U\e(B-\e$,15y68\e(B-\e$,16?\e(B]\\(?:\e$,16-\e(B\\|[\e$,15~\e(B-\e$,16-6B6C\e(B]?[\e$,15B5A\e(B]?\\)?" |
| 60 | "\\)") |
| 61 | "Regexp matching a composable sequence of Devanagari characters.") |
| 62 | |
| 63 | (defun devanagari-compose-region (from to) |
| 64 | (interactive "r") |
| 65 | (save-excursion |
| 66 | (save-restriction |
| 67 | (narrow-to-region from to) |
| 68 | (goto-char (point-min)) |
| 69 | (while (re-search-forward devanagari-composable-pattern nil t) |
| 70 | (devanagari-compose-syllable-region (match-beginning 0) |
| 71 | (match-end 0)))))) |
| 72 | (defun devanagari-compose-string (string) |
| 73 | (with-temp-buffer |
| 74 | (insert (decompose-string string)) |
| 75 | (devanagari-compose-region (point-min) (point-max)) |
| 76 | (buffer-string))) |
| 77 | |
| 78 | (defun devanagari-post-read-conversion (len) |
| 79 | (save-excursion |
| 80 | (save-restriction |
| 81 | (let ((buffer-modified-p (buffer-modified-p))) |
| 82 | (narrow-to-region (point) (+ (point) len)) |
| 83 | (devanagari-compose-region (point-min) (point-max)) |
| 84 | (set-buffer-modified-p buffer-modified-p) |
| 85 | (- (point-max) (point-min)))))) |
| 86 | |
| 87 | (defun devanagari-range (from to) |
| 88 | "Make the list of the integers of range FROM to TO." |
| 89 | (let (result) |
| 90 | (while (<= from to) (setq result (cons to result) to (1- to))) result)) |
| 91 | |
| 92 | (defun devanagari-regexp-of-hashtbl-keys (hashtbl) |
| 93 | "Return a regular expression that matches all keys in hashtable HASHTBL." |
| 94 | (let ((max-specpdl-size 1000)) |
| 95 | (regexp-opt |
| 96 | (sort |
| 97 | (let (dummy) |
| 98 | (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl) |
| 99 | dummy) |
| 100 | (function (lambda (x y) (> (length x) (length y)))))))) |
| 101 | |
| 102 | (defun devanagari-composition-function (from to pattern &optional string) |
| 103 | "Compose Devanagari characters in REGION, or STRING if specified. |
| 104 | Assume that the REGION or STRING must fully match the composable |
| 105 | PATTERN regexp." |
| 106 | (if string (devanagari-compose-syllable-string string) |
| 107 | (devanagari-compose-syllable-region from to)) |
| 108 | (- to from)) |
| 109 | |
| 110 | ;; Register a function to compose Devanagari characters. |
| 111 | (mapc |
| 112 | (function (lambda (ucs) |
| 113 | (aset composition-function-table (decode-char 'ucs ucs) |
| 114 | (list (cons devanagari-composable-pattern |
| 115 | 'devanagari-composition-function))))) |
| 116 | (nconc '(#x0903) (devanagari-range #x0905 #x0939) (devanagari-range #x0958 #x0961))) |
| 117 | |
| 118 | ;; Notes on conversion steps. |
| 119 | |
| 120 | ;; 1. chars to glyphs |
| 121 | ;; |
| 122 | ;; Rules will not be applied to the halant appeared at the end of the |
| 123 | ;; text. Also, the preceding/following "r" will be treated as special case. |
| 124 | |
| 125 | ;; 2. glyphs reordering. |
| 126 | ;; |
| 127 | ;; The glyphs are split by halant, and each glyph groups are |
| 128 | ;; re-ordered in the following order. |
| 129 | ;; |
| 130 | ;; Note that `consonant-glyph' mentioned here does not contain the |
| 131 | ;; vertical bar (right modifier) attached at the right of the |
| 132 | ;; consonant. |
| 133 | ;; |
| 134 | ;; If the glyph-group contains right modifier, |
| 135 | ;; (1) consonant-glyphs/vowels, with nukta sign |
| 136 | ;; (2) spacing |
| 137 | ;; (3) right modifier (may be matra) |
| 138 | ;; (4) top matra |
| 139 | ;; (5) preceding "r" |
| 140 | ;; (6) anuswar |
| 141 | ;; (7) following "r" |
| 142 | ;; (8) bottom matra or halant. |
| 143 | ;; |
| 144 | ;; Otherwise, |
| 145 | ;; (1) consonant-glyph/vowels, with nukta sign |
| 146 | ;; (3) left matra |
| 147 | ;; (4) top matra |
| 148 | ;; (5) preceding "r" |
| 149 | ;; (6) anuswar |
| 150 | ;; (7) following "r" |
| 151 | ;; (8) bottom matra or halant. |
| 152 | ;; (2) spacing |
| 153 | |
| 154 | ;; 3. glyph to glyph |
| 155 | ;; |
| 156 | ;; For better display, some glyph display would be tuned. |
| 157 | |
| 158 | ;; 4. Composition. |
| 159 | ;; |
| 160 | ;; left modifiers will be attached at the left. |
| 161 | ;; others will be attached right. |
| 162 | |
| 163 | ;; Problem:: |
| 164 | ;; Can we generalize this methods to other Indian scripts? |
| 165 | |
| 166 | (defvar dev-char-glyph |
| 167 | '(("\e$,15E\e(B" . "\e$,4 K\e(B") |
| 168 | ("\e$,15F\e(B" . "\e$,4 K")\e(B") |
| 169 | ("\e$,15~\e(B" . "\e$,4")\e(B") |
| 170 | ("\e$,15G\e(B" . "\e$,4 \\e(B") |
| 171 | ("\e$,15\7f\e(B" . "\e$,4"*\e(B") |
| 172 | ("\e$,15\7f5A\e(B" . "\e$,4"*\e(B\\e$,4"&\e(B") |
| 173 | ("\e$,15H\e(B" . "\e$,4 \"'\e(B") |
| 174 | ("\e$,15H5A\e(B" . "\e$,4 \"'"&\e(B") |
| 175 | ("\e$,16 \e(B" . "\e$,4"2\e(B") |
| 176 | ("\e$,16 5A\e(B" . "\e$,4"2"&\e(B") |
| 177 | ("\e$,15I\e(B" . "\e$,4 ]\e(B") |
| 178 | ("\e$,16!\e(B" . "\e$,4"6\e(B") |
| 179 | ("\e$,15J\e(B" . "\e$,4 ^"P\e(B") |
| 180 | ("\e$,16"\e(B" . "\e$,4":\e(B") |
| 181 | ("\e$,15K\e(B" . "\e$,4 `"Q\e(B") |
| 182 | ("\e$,16#\e(B" . "\e$,4">\e(B") |
| 183 | ;;("\e$,15L\e(B" . nil) ; not implemented. |
| 184 | ("\e$,16$\e(B" . "\e$,4"?\e(B") |
| 185 | ("\e$,15M\e(B" . "\e$,4 b"L\e(B") |
| 186 | ("\e$,15M5A\e(B" . "\e$,4 b"$\e(B") |
| 187 | ("\e$,15M5B\e(B" . "\e$,4 b"$\e(B") |
| 188 | ("\e$,16%\e(B" . "\\e$,4"L\e(B") |
| 189 | ("\e$,15N\e(B" . "\e$,4 b"@\e(B") |
| 190 | ("\e$,15N5A\e(B" . "\e$,4 b"@"&\e(B") |
| 191 | ("\e$,16&\e(B" . "\\e$,4"@\e(B") |
| 192 | ("\e$,16&5A\e(B" . "\\e$,4"@\e(B\\e$,4"&\e(B") |
| 193 | ("\e$,15O\e(B" . "\e$,4 b\e(B") |
| 194 | ("\e$,16'\e(B" . "\\e$,4"D\e(B") |
| 195 | ("\e$,16'5A\e(B" . "\\e$,4"D\e(B\\e$,4"&\e(B") |
| 196 | ("\e$,15P\e(B" . "\e$,4 b"D\e(B") |
| 197 | ("\e$,15P5A\e(B" . "\e$,4 b"D"&\e(B") |
| 198 | ("\e$,16(\e(B" . "\\e$,4"H\e(B") |
| 199 | ("\e$,16(5A\e(B" . "\\e$,4"H\e(B\\e$,4"&\e(B") |
| 200 | ("\e$,15Q\e(B" . "\e$,4 K")"L\e(B") ;; special rule for reodering. |
| 201 | ("\e$,15Q5A\e(B" . "\e$,4 K")"$\e(B") |
| 202 | ("\e$,15Q5B\e(B" . "\e$,4 K")"$\e(B") |
| 203 | ("\e$,16)\e(B" . "\\e$,4")"L\e(B") |
| 204 | ("\e$,16)5A\e(B" . "\\e$,4")"$\e(B") |
| 205 | ("\e$,16)5B\e(B" . "\\e$,4")"$\e(B") |
| 206 | ("\e$,15R\e(B" . "\e$,4 K")"@\e(B") |
| 207 | ("\e$,15R5A\e(B" . "\e$,4 K")"@"&\e(B") |
| 208 | ("\e$,16*\e(B" . "\\e$,4")"@\e(B") |
| 209 | ("\e$,16*5A\e(B" . "\\e$,4")"@"&\e(B") |
| 210 | ("\e$,15S\e(B" . "\e$,4 K")"D\e(B") |
| 211 | ("\e$,15S5A\e(B" . "\e$,4 K")"D"&\e(B") |
| 212 | ("\e$,16+\e(B" . "\\e$,4")"D\e(B") |
| 213 | ("\e$,16+5A\e(B" . "\\e$,4")"D"&\e(B") |
| 214 | ("\e$,15T\e(B" . "\e$,4 K")"H\e(B") |
| 215 | ("\e$,15T5A\e(B" . "\e$,4 K")"H"&\e(B") |
| 216 | ("\e$,16,\e(B" . "\\e$,4")"H\e(B") |
| 217 | ("\e$,16,5A\e(B" . "\\e$,4")"H"&\e(B") |
| 218 | ("\e$,16@\e(B" . "\e$,4 a"Q\e(B") |
| 219 | ;;("\e$,16B\e(B" . nil) |
| 220 | ;;("\e$,16A\e(B" . nil) |
| 221 | ;;("\e$,16C\e(B" . nil) |
| 222 | |
| 223 | ;; GRUTTALS |
| 224 | ("\e$,15U\e(B" . "\e$,4 e"R\e(B") |
| 225 | ("\e$,15U6-\e(B" . "\e$,4 c\e(B") |
| 226 | ("\e$,15U6-5p\e(B" . "\e$,4 g"R\e(B") |
| 227 | ("\e$,15U6-5d\e(B" . "\e$,4 h"R\e(B") |
| 228 | ("\e$,15U6-5w\e(B" . "\e$,4 i")\e(B") |
| 229 | ("\e$,15U6-5w6-\e(B" . "\e$,4 i\e(B") |
| 230 | |
| 231 | ("\e$,15V\e(B" . "\e$,4 j")\e(B") |
| 232 | ("\e$,15V6-\e(B" . "\e$,4 j\e(B") |
| 233 | ("\e$,15V6-5p\e(B" . "\e$,4 l")\e(B") |
| 234 | ("\e$,15V6-5p6-\e(B" . "\e$,4 l\e(B") |
| 235 | |
| 236 | ("\e$,15W\e(B" . "\e$,4 m")\e(B") |
| 237 | ("\e$,15W6-\e(B" . "\e$,4 m\e(B") |
| 238 | ("\e$,15W6-5p\e(B" . "\e$,4 o")\e(B") |
| 239 | ("\e$,15W6-5p6-\e(B" . "\e$,4 o\e(B") |
| 240 | |
| 241 | ("\e$,15X\e(B" . "\e$,4 p")\e(B") |
| 242 | ("\e$,15X6-\e(B" . "\e$,4 p\e(B") |
| 243 | ("\e$,15X6-5p\e(B" . "\e$,4 q")\e(B") |
| 244 | ("\e$,15X6-5p6-\e(B" . "\e$,4 q\e(B") |
| 245 | |
| 246 | ("\e$,15Y\e(B" . "\e$,4 r"S\e(B") |
| 247 | ;; PALATALS |
| 248 | ("\e$,15Z\e(B" . "\e$,4 s")\e(B") |
| 249 | ("\e$,15Z6-\e(B" . "\e$,4 s\e(B") |
| 250 | ("\e$,15Z6-5p\e(B" . "\e$,4 t")\e(B") |
| 251 | ("\e$,15Z6-5p6-\e(B" . "\e$,4 t\e(B") |
| 252 | |
| 253 | ("\e$,15[\e(B" . "\e$,4 u"T\e(B") |
| 254 | |
| 255 | ("\e$,15\\e(B" . "\e$,4 v")\e(B") |
| 256 | ("\e$,15\6-\e(B" . "\e$,4 v\e(B") |
| 257 | ("\e$,15\6-5p\e(B" . "\e$,4 x")\e(B") |
| 258 | ("\e$,15\6-5p6-\e(B" . "\e$,4 x\e(B") |
| 259 | ("\e$,15\6-5^\e(B" . "\e$,4 y")\e(B") |
| 260 | ("\e$,15\6-5^6-\e(B" . "\e$,4 y\e(B") |
| 261 | |
| 262 | ("\e$,15]\e(B" . "\e$,4 z")\e(B") |
| 263 | ("\e$,15]6-\e(B" . "\e$,4 z\e(B") |
| 264 | ("\e$,15]6-5p\e(B" . "\e$,4 {")\e(B") |
| 265 | ("\e$,15]6-5p6-\e(B" . "\e$,4 {\e(B") |
| 266 | |
| 267 | ("\e$,15^\e(B" . "\e$,4 |")\e(B") |
| 268 | ("\e$,15^6-\e(B" . "\e$,4 |\e(B") |
| 269 | ;; CEREBRALS |
| 270 | ("\e$,15_\e(B" . "\e$,4 }"U\e(B") |
| 271 | ("\e$,15_6-5_\e(B" . "\e$,4 ~"U\e(B") |
| 272 | ("\e$,15_6-5`\e(B" . "\e$,4 \7f"U\e(B") |
| 273 | |
| 274 | ("\e$,15`\e(B" . "\e$,4! "V\e(B") |
| 275 | ("\e$,15`6-5`\e(B" . "\e$,4!!"V\e(B") |
| 276 | |
| 277 | ("\e$,15a\e(B" . "\e$,4!""W\e(B") |
| 278 | ("\e$,15a6-5a\e(B" . "\e$,4!$"W\e(B") |
| 279 | ("\e$,15a6-5b\e(B" . "\e$,4!%"W\e(B") |
| 280 | |
| 281 | ("\e$,15b\e(B" . "\e$,4!&"X\e(B") |
| 282 | |
| 283 | ("\e$,15c\e(B" . "\e$,4!(")\e(B") |
| 284 | ("\e$,15c6-\e(B" . "\e$,4!(\e(B") |
| 285 | ;; DENTALS |
| 286 | ("\e$,15d\e(B" . "\e$,4!)")\e(B") |
| 287 | ("\e$,15d6-\e(B" . "\e$,4!)\e(B") |
| 288 | ("\e$,15d6-5p\e(B" . "\e$,4!*")\e(B") |
| 289 | ("\e$,15d6-5p6-\e(B" . "\e$,4!*\e(B") |
| 290 | ("\e$,15d6-5d\e(B" . "\e$,4!+")\e(B") |
| 291 | ("\e$,15d6-5d6-\e(B" . "\e$,4!+\e(B") |
| 292 | |
| 293 | ("\e$,15e\e(B" . "\e$,4!,")\e(B") |
| 294 | ("\e$,15e6-\e(B" . "\e$,4!,\e(B") |
| 295 | ("\e$,15e6-5p\e(B" . "\e$,4!-")\e(B") |
| 296 | ("\e$,15e6-5p6-\e(B" . "\e$,4!-\e(B") |
| 297 | |
| 298 | ("\e$,15f\e(B" . "\e$,4!."Y\e(B") |
| 299 | ("\e$,15f6#\e(B" . "\e$,4!/"Y\e(B") |
| 300 | ("\e$,15f6-5p\e(B" . "\e$,4!0"Y\e(B") |
| 301 | ("\e$,15f6-5f\e(B" . "\e$,4!1"Y\e(B") |
| 302 | ("\e$,15f6-5g\e(B" . "\e$,4!2"Y\e(B") |
| 303 | ("\e$,15f6-5n\e(B" . "\e$,4!3\e(B") |
| 304 | ("\e$,15f6-5o\e(B" . "\e$,4!4\e(B") |
| 305 | ("\e$,15f6-5u\e(B" . "\e$,4!5"Y\e(B") |
| 306 | |
| 307 | ("\e$,15g\e(B" . "\e$,4!6")\e(B") |
| 308 | ("\e$,15g6-\e(B" . "\e$,4!6\e(B") |
| 309 | ("\e$,15g6-5p\e(B" . "\e$,4!7")\e(B") |
| 310 | ("\e$,15g6-5p6-\e(B" . "\e$,4!7\e(B") |
| 311 | |
| 312 | ("\e$,15h\e(B" . "\e$,4!8")\e(B") |
| 313 | ("\e$,15h6-\e(B" . "\e$,4!8\e(B") |
| 314 | ("\e$,15h6-5p\e(B" . "\e$,4!9")\e(B") |
| 315 | ("\e$,15h6-5p6-\e(B" . "\e$,4!9")\e(B") |
| 316 | ("\e$,15h6-5h\e(B" . "\e$,4!:")\e(B") |
| 317 | ("\e$,15h6-5h6-\e(B" . "\e$,4!:\e(B") |
| 318 | |
| 319 | ("\e$,15i\e(B" . "\e$,4!8"#")\e(B") |
| 320 | ;; LABIALS |
| 321 | ("\e$,15j\e(B" . "\e$,4!;")\e(B") |
| 322 | ("\e$,15j6-\e(B" . "\e$,4!;\e(B") |
| 323 | ("\e$,15j6-5p\e(B" . "\e$,4!<")\e(B") |
| 324 | ("\e$,15j6-5p6-\e(B" . "\e$,4!<\e(B") |
| 325 | |
| 326 | ("\e$,15k\e(B" . "\e$,4!a"[\e(B") |
| 327 | ("\e$,15k6-\e(B" . "\e$,4!=\e(B") |
| 328 | ("\e$,15k6-5p\e(B" . "\e$,4!c"[\e(B") |
| 329 | |
| 330 | ("\e$,15l\e(B" . "\e$,4!d")\e(B") |
| 331 | ("\e$,15l6-\e(B" . "\e$,4!d\e(B") |
| 332 | ("\e$,15l6-5p\e(B" . "\e$,4!e")\e(B") |
| 333 | ("\e$,15l6-5p6-\e(B" . "\e$,4!e\e(B") |
| 334 | |
| 335 | ("\e$,15m\e(B" . "\e$,4!f")\e(B") |
| 336 | ("\e$,15m6-\e(B" . "\e$,4!f\e(B") |
| 337 | ("\e$,15m6-5p\e(B" . "\e$,4!g")\e(B") |
| 338 | ("\e$,15m6-5p6-\e(B" . "\e$,4!g\e(B") |
| 339 | |
| 340 | ("\e$,15n\e(B" . "\e$,4!h")\e(B") |
| 341 | ("\e$,15n6-\e(B" . "\e$,4!h\e(B") |
| 342 | ("\e$,15n6-5p\e(B" . "\e$,4!i")\e(B") |
| 343 | ("\e$,15n6-5p6-\e(B" . "\e$,4!i\e(B") |
| 344 | ;; SEMIVOWELS |
| 345 | ("\e$,15o\e(B" . "\e$,4!j")\e(B") |
| 346 | ("\e$,15o6-\e(B" . "\e$,4!j\e(B") |
| 347 | ("\e$,15o6-5p\e(B" . "\e$,4!k")\e(B") |
| 348 | ("\e$,15o6-5p6-\e(B" . "\e$,4!k\e(B") |
| 349 | ("\e$,16-5o\e(B" . "\e$,4!l\e(B") ;; when every ohter lig. fails. |
| 350 | |
| 351 | ("\e$,15p\e(B" . "\e$,4!n"W\e(B") |
| 352 | ;; ("\e$,15p6-\e(B" . "\\e$,4"'\e(B") ;; special case. only the topmost pos. |
| 353 | ("\e$,15q\e(B" . "\e$,4!n"#"W\e(B") |
| 354 | ("\e$,15q6-\e(B" . "\e$,4!m\e(B") ;; IS 13194 speical rule. |
| 355 | ("\e$,15p6!\e(B" . "\e$,4!o"[\e(B") |
| 356 | ("\e$,15p6"\e(B" . "\e$,4!p"\\e(B") |
| 357 | |
| 358 | ("\e$,15r\e(B" . "\e$,4!q")\e(B") |
| 359 | ("\e$,15r6-\e(B" . "\e$,4!q\e(B") |
| 360 | ("\e$,15s\e(B" . "\e$,4!s\e(B") |
| 361 | ("\e$,15s6-\e(B" . "\e$,4!r\e(B") |
| 362 | ("\e$,15t\e(B" . "\e$,4!s"#\e(B") |
| 363 | ("\e$,15t6-\e(B" . "\e$,4!r"#\e(B") |
| 364 | |
| 365 | ("\e$,15u\e(B" . "\e$,4!t")\e(B") |
| 366 | ("\e$,15u6-\e(B" . "\e$,4!t\e(B") |
| 367 | ("\e$,15u6-5p\e(B" . "\e$,4!u")\e(B") |
| 368 | ("\e$,15u6-5p6-\e(B" . "\e$,4!u\e(B") |
| 369 | ;; SIBILANTS |
| 370 | ("\e$,15v\e(B" . "\e$,4!v")\e(B") |
| 371 | ("\e$,15v6-\e(B" . "\e$,4!v\e(B") |
| 372 | ("\e$,15v6-5u\e(B" . "\e$,4!w")\e(B") |
| 373 | ("\e$,15v6-5u6-\e(B" . "\e$,4!w\e(B") |
| 374 | ("\e$,15v6-5p\e(B" . "\e$,4!x")\e(B") |
| 375 | ("\e$,15v6-5p6-\e(B" . "\e$,4!x\e(B") |
| 376 | |
| 377 | ("\e$,15w\e(B" . "\e$,4!y")\e(B") |
| 378 | ("\e$,15w6-\e(B" . "\e$,4!y\e(B") |
| 379 | ("\e$,15x\e(B" . "\e$,4!z")\e(B") |
| 380 | ("\e$,15x6-\e(B" . "\e$,4!z\e(B") |
| 381 | ("\e$,15x6-5p\e(B" . "\e$,4!{")\e(B") |
| 382 | ("\e$,15x6-5p6-\e(B" . "\e$,4!{\e(B") |
| 383 | |
| 384 | ("\e$,15y\e(B" . "\e$,4!}\e(B") |
| 385 | ("\e$,15y6-\e(B" . "\e$,4!|\e(B") |
| 386 | ("\e$,15y6#\e(B" . "\e$,4!~\e(B") |
| 387 | ("\e$,15y6-5p\e(B" . "\e$,4!\7f\e(B") |
| 388 | ("\e$,15y6-5n\e(B" . "\e$,4" \e(B") |
| 389 | ("\e$,15y6-5o\e(B" . "\e$,4"!\e(B") |
| 390 | ;; NUKTAS |
| 391 | ("\e$,168\e(B" . "\e$,4 f"R"S\e(B") |
| 392 | ("\e$,1686-\e(B" . "\e$,4 d\e(B") |
| 393 | ("\e$,169\e(B" . "\e$,4 k")\e(B") |
| 394 | ("\e$,1696-\e(B" . "\e$,4 k\e(B") |
| 395 | ("\e$,16:\e(B" . "\e$,4 n")\e(B") |
| 396 | ("\e$,16:6-\e(B" . "\e$,4 n\e(B") |
| 397 | ("\e$,16;\e(B" . "\e$,4 w")\e(B") |
| 398 | ("\e$,16;6-\e(B" . "\e$,4 w\e(B") |
| 399 | ("\e$,16<\e(B" . "\e$,4!#"W\e(B") |
| 400 | ("\e$,16=\e(B" . "\e$,4!'"X\e(B") |
| 401 | ("\e$,16>\e(B" . "\e$,4!b"[\e(B") |
| 402 | ("\e$,16>6-\e(B" . "\e$,4!>\e(B") |
| 403 | ("\e$,16?\e(B" . "\e$,4!j"#")\e(B") |
| 404 | ;; misc modifiers. |
| 405 | ("\e$,15A\e(B" . "\\e$,4"$\e(B") |
| 406 | ("\e$,15B\e(B" . "\\e$,4"&\e(B") |
| 407 | ("\e$,15C\e(B" . "\e$,4 F\e(B") |
| 408 | ("\e$,15|\e(B" . "\e$,4"#\e(B") |
| 409 | ("\e$,15}\e(B" . "\e$,4 E\e(B") |
| 410 | ("\e$,16-\e(B" . "\e$,4""\e(B") |
| 411 | ("\e$,16-5p\e(B" . "\e$,4"%\e(B") ;; following "r" |
| 412 | ;; ("\e$,160\e(B" . "\e$,4 D\e(B") |
| 413 | ;; ("\e$,16D\e(B" . "\e$,4 J\e(B") |
| 414 | ;; ("\e$,16F\e(B" . "") |
| 415 | ;; ("\e$,16G\e(B" . "") |
| 416 | ;; ("\e$,16H\e(B" . "") |
| 417 | ;; ("\e$,16I\e(B" . "") |
| 418 | ;; ("\e$,16J\e(B" . "") |
| 419 | ;; ("\e$,16K\e(B" . "") |
| 420 | ;; ("\e$,16L\e(B" . "") |
| 421 | ;; ("\e$,16M\e(B" . "") |
| 422 | ;; ("\e$,16N\e(B" . "") |
| 423 | ;; ("\e$,16O\e(B" . "") |
| 424 | ) |
| 425 | "Devanagari characters to glyphs conversion table. |
| 426 | Default value contains only the basic rules. You may add your own |
| 427 | preferred rule from the sanskrit fonts." ) |
| 428 | |
| 429 | (defvar dev-char-glyph-hash |
| 430 | (let* ((hash (makehash 'equal))) |
| 431 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) |
| 432 | dev-char-glyph) |
| 433 | hash)) |
| 434 | |
| 435 | (defvar dev-char-glyph-regexp |
| 436 | (devanagari-regexp-of-hashtbl-keys dev-char-glyph-hash)) |
| 437 | |
| 438 | ;; glyph-to-glyph conversion table. |
| 439 | ;; it is supposed that glyphs are ordered in |
| 440 | ;; [consonant/nukta] - [matra/halant] - [preceding-r] - [anuswar]. |
| 441 | |
| 442 | (defvar dev-glyph-glyph |
| 443 | '(("\\e$,4"'\e(B\\e$,4"&\e(B" . "\\e$,4"(\e(B") |
| 444 | ("\\e$,4"'\e(B\\e$,4"$\e(B" . "\\e$,4"(\e(B") |
| 445 | ("\e$,4"*\e(B\\e$,4"&\e(B" . "\e$,4"+\e(B") |
| 446 | ("\e$,4"*\e(B\\e$,4"'\e(B" . "\e$,4",\e(B") |
| 447 | ("\e$,4"*\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\e$,4"-\e(B") |
| 448 | ("\e$,4"2\e(B\\e$,4"&\e(B" . "\e$,4"3\e(B") |
| 449 | ("\e$,4"2\e(B\\e$,4"'\e(B" . "\e$,4"4\e(B") |
| 450 | ("\e$,4"2\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\e$,4"5\e(B") |
| 451 | ("\e$,4"#\e(B\\e$,4"6\e(B" . "\e$,4"7\e(B") |
| 452 | ("\e$,4"%\e(B\\e$,4"6\e(B" . "\e$,4"8\e(B") |
| 453 | ;;("\e$,4"6\e(B" . "\e$,4"9\e(B") |
| 454 | ("\e$,4"#\e(B\\e$,4":\e(B" . "\e$,4";\e(B") |
| 455 | ("\e$,4"%\e(B\\e$,4":\e(B" . "\e$,4"<\e(B") |
| 456 | ;;("\e$,4":\e(B" . "\e$,4"=\e(B") |
| 457 | ("\\e$,4"@\e(B\\e$,4"&\e(B" . "\\e$,4"A\e(B") |
| 458 | ("\\e$,4"@\e(B\\e$,4"'\e(B" . "\\e$,4"B\e(B") |
| 459 | ("\\e$,4"@\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\\e$,4"C\e(B") |
| 460 | ("\\e$,4"D\e(B\\e$,4"&\e(B" . "\\e$,4"E\e(B") |
| 461 | ("\\e$,4"D\e(B\\e$,4"'\e(B" . "\\e$,4"F\e(B") |
| 462 | ("\\e$,4"D\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\\e$,4"G\e(B") |
| 463 | ("\\e$,4"H\e(B\\e$,4"&\e(B" . "\\e$,4"I\e(B") |
| 464 | ("\\e$,4"H\e(B\\e$,4"'\e(B" . "\\e$,4"J\e(B") |
| 465 | ("\\e$,4"H\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\\e$,4"K\e(B") |
| 466 | ("\\e$,4"L\e(B\\e$,4"&\e(B" . "\\e$,4"M\e(B") |
| 467 | ("\\e$,4"L\e(B\\e$,4"'\e(B" . "\\e$,4"N\e(B") |
| 468 | ("\\e$,4"L\e(B\\e$,4"'\e(B\\e$,4"&\e(B" . "\\e$,4"O\e(B") |
| 469 | )) |
| 470 | (defvar dev-glyph-glyph-hash |
| 471 | (let* ((hash (makehash 'equal))) |
| 472 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) |
| 473 | dev-glyph-glyph) |
| 474 | hash)) |
| 475 | (defvar dev-glyph-glyph-regexp |
| 476 | (devanagari-regexp-of-hashtbl-keys dev-glyph-glyph-hash)) |
| 477 | |
| 478 | |
| 479 | ;; yet another glyph-to-glyph conversions. |
| 480 | (defvar dev-glyph-glyph-2 |
| 481 | '(("\e$,4"*\e(B" . "\e$,4".\e(B") |
| 482 | ("\e$,4"+\e(B" . "\e$,4"/\e(B") |
| 483 | ("\e$,4",\e(B" . "\e$,4"0\e(B") |
| 484 | ("\e$,4"-\e(B" . "\e$,4"1\e(B"))) |
| 485 | (defvar dev-glyph-glyph-2-hash |
| 486 | (let* ((hash (makehash 'equal))) |
| 487 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) |
| 488 | dev-glyph-glyph-2) |
| 489 | hash)) |
| 490 | (defvar dev-glyph-glyph-2-regexp |
| 491 | (devanagari-regexp-of-hashtbl-keys dev-glyph-glyph-2-hash)) |
| 492 | |
| 493 | |
| 494 | (defun dev-charseq (from &optional to) |
| 495 | (if (null to) (setq to from)) |
| 496 | (mapcar (function (lambda (x) (indian-glyph-char x 'devanagari))) |
| 497 | (devanagari-range from to))) |
| 498 | |
| 499 | (defvar dev-glyph-cvn |
| 500 | (append |
| 501 | (dev-charseq #x2b) |
| 502 | (dev-charseq #x3c #xc1) |
| 503 | (dev-charseq #xc3)) |
| 504 | "Devanagari Consonants/Vowels/Nukta Glyphs") |
| 505 | |
| 506 | (defvar dev-glyph-space |
| 507 | (dev-charseq #xf0 #xfe) |
| 508 | "Devanagari Spacing Glyphs") |
| 509 | |
| 510 | (defvar dev-glyph-right-modifier |
| 511 | (append |
| 512 | (dev-charseq #xc9) |
| 513 | (dev-charseq #xd2 #xd5)) |
| 514 | "Devanagari Modifiers attached at the right side.") |
| 515 | |
| 516 | (defvar dev-glyph-right-modifier-regexp |
| 517 | (concat "[" dev-glyph-right-modifier "]")) |
| 518 | |
| 519 | (defvar dev-glyph-left-matra |
| 520 | (dev-charseq #xca #xd1) |
| 521 | "Devanagari Matras attached at the left side.") |
| 522 | |
| 523 | (defvar dev-glyph-top-matra |
| 524 | (dev-charseq #xe0 #xef) |
| 525 | "Devanagari Matras attached at the top side.") |
| 526 | |
| 527 | (defvar dev-glyph-bottom-modifier |
| 528 | (append |
| 529 | (dev-charseq #xd6 #xdf) |
| 530 | (dev-charseq #xc2)) |
| 531 | "Devanagari Modifiers attached at the bottom.") |
| 532 | |
| 533 | (defvar dev-glyph-order |
| 534 | `((,dev-glyph-cvn . 1) |
| 535 | (,dev-glyph-space . 2) |
| 536 | (,dev-glyph-right-modifier . 3) |
| 537 | (,dev-glyph-left-matra . 3) ;; processed by reference point. |
| 538 | (,dev-glyph-top-matra . 4) |
| 539 | (,(dev-charseq #xc7 #xc8) . 5) |
| 540 | (,(dev-charseq #xc6) . 6) |
| 541 | (,(dev-charseq #xc5) . 7) |
| 542 | (,dev-glyph-bottom-modifier . 8))) |
| 543 | |
| 544 | (mapc |
| 545 | (function (lambda (x) |
| 546 | (mapc |
| 547 | (function (lambda (y) |
| 548 | (put-char-code-property y 'composition-order (cdr x)))) |
| 549 | (car x)))) |
| 550 | dev-glyph-order) |
| 551 | |
| 552 | (mapc |
| 553 | (function (lambda (x) |
| 554 | (put-char-code-property x 'reference-point '(3 . 5)))) |
| 555 | dev-glyph-left-matra) |
| 556 | |
| 557 | (defun devanagari-compose-syllable-string (string) |
| 558 | (with-temp-buffer |
| 559 | (insert (decompose-string string)) |
| 560 | (devanagari-compose-syllable-region (point-min) (point-max)) |
| 561 | (buffer-string))) |
| 562 | |
| 563 | (defun devanagari-compose-syllable-region (from to) |
| 564 | "Compose devanagari syllable in region FROM to TO." |
| 565 | (let ((glyph-str nil) (cons-num 0) glyph-str-list |
| 566 | (last-halant nil) (preceding-r nil) (last-modifier nil) |
| 567 | (last-char (char-before to)) match-str |
| 568 | glyph-block split-pos) |
| 569 | (save-excursion |
| 570 | (save-restriction |
| 571 | ;;; *** char-to-glyph conversion *** |
| 572 | ;; Special rule 1. -- Last halant must be preserved. |
| 573 | (if (eq last-char ?\e$,16-\e(B) |
| 574 | (progn |
| 575 | (setq last-halant t) |
| 576 | (narrow-to-region from (1- to))) |
| 577 | (narrow-to-region from to) |
| 578 | ;; note if the last char is modifier. |
| 579 | (if (or (eq last-char ?\e$,15A\e(B) (eq last-char ?\e$,15B\e(B)) |
| 580 | (setq last-modifier t))) |
| 581 | (goto-char (point-min)) |
| 582 | ;; Special rule 2. -- preceding "r halant" must be modifier. |
| 583 | (when (looking-at "\e$,15p6-\e(B.") |
| 584 | (setq preceding-r t) |
| 585 | (goto-char (+ 2 (point)))) |
| 586 | ;; translate the rest characters into glyphs |
| 587 | (while (re-search-forward dev-char-glyph-regexp nil t) |
| 588 | (setq match-str (match-string 0)) |
| 589 | (setq glyph-str |
| 590 | (concat glyph-str |
| 591 | (gethash match-str dev-char-glyph-hash))) |
| 592 | ;; count the number of consonant-glyhs. |
| 593 | (if (string-match devanagari-consonant match-str) |
| 594 | (setq cons-num (1+ cons-num)))) |
| 595 | ;; preceding-r must be attached before the anuswar if exists. |
| 596 | (if preceding-r |
| 597 | (if last-modifier |
| 598 | (setq glyph-str (concat (substring glyph-str 0 -1) |
| 599 | "\e$,4"'\e(B" (substring glyph-str -1))) |
| 600 | (setq glyph-str (concat glyph-str "\e$,4"'\e(B")))) |
| 601 | (if last-halant (setq glyph-str (concat glyph-str "\e$,4""\e(B"))) |
| 602 | ;;; *** glyph-to-glyph conversion *** |
| 603 | (when (string-match dev-glyph-glyph-regexp glyph-str) |
| 604 | (setq glyph-str |
| 605 | (replace-match (gethash (match-string 0 glyph-str) |
| 606 | dev-glyph-glyph-hash) |
| 607 | nil t glyph-str)) |
| 608 | (if (and (> cons-num 1) |
| 609 | (string-match dev-glyph-glyph-2-regexp glyph-str)) |
| 610 | (setq glyph-str |
| 611 | (replace-match (gethash (match-string 0 glyph-str) |
| 612 | dev-glyph-glyph-2-hash) |
| 613 | nil t glyph-str)))) |
| 614 | ;;; *** glyph reordering *** |
| 615 | (while (setq split-pos (string-match "\e$,4""\e(B\\|.$" glyph-str)) |
| 616 | (setq glyph-block (substring glyph-str 0 (1+ split-pos))) |
| 617 | (setq glyph-str (substring glyph-str (1+ split-pos))) |
| 618 | (setq |
| 619 | glyph-block |
| 620 | (if (string-match dev-glyph-right-modifier-regexp glyph-block) |
| 621 | (sort (string-to-list glyph-block) |
| 622 | (function (lambda (x y) |
| 623 | (< (get-char-code-property x 'composition-order) |
| 624 | (get-char-code-property y 'composition-order))))) |
| 625 | (sort (string-to-list glyph-block) |
| 626 | (function (lambda (x y) |
| 627 | (let ((xo (get-char-code-property x 'composition-order)) |
| 628 | (yo (get-char-code-property y 'composition-order))) |
| 629 | (if (= xo 2) nil (if (= yo 2) t (< xo yo))))))))) |
| 630 | (setq glyph-str-list (nconc glyph-str-list glyph-block))) |
| 631 | ;; concatenate and attach reference-points. |
| 632 | (setq glyph-str |
| 633 | (cdr |
| 634 | (apply |
| 635 | 'nconc |
| 636 | (mapcar |
| 637 | (function (lambda (x) |
| 638 | (list |
| 639 | (or (get-char-code-property x 'reference-point) |
| 640 | '(5 . 3) ;; default reference point. |
| 641 | ) |
| 642 | x))) |
| 643 | glyph-str-list)))))) |
| 644 | (compose-region from to glyph-str))) |
| 645 | |
| 646 | (provide 'devan-util) |
| 647 | |
| 648 | ;;; devan-util.el ends here |