| 1 | ;;; knd-util.el --- Support for composing Kannada characters |
| 2 | |
| 3 | ;; Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 |
| 4 | ;; Free Software Foundation, Inc. |
| 5 | |
| 6 | ;; Maintainer: Maintainer: CHOWKSEY, Kailash C. <klchxbec@m-net.arbornet.org> |
| 7 | ;; Keywords: multilingual, Kannada |
| 8 | |
| 9 | ;; This file is part of GNU Emacs. |
| 10 | |
| 11 | ;; GNU Emacs is free software; you can redistribute it and/or modify |
| 12 | ;; it under the terms of the GNU General Public License as published by |
| 13 | ;; the Free Software Foundation; either version 3, or (at your option) |
| 14 | ;; any later version. |
| 15 | |
| 16 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 17 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 18 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 19 | ;; GNU General Public License for more details. |
| 20 | |
| 21 | ;; You should have received a copy of the GNU General Public License |
| 22 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
| 23 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 24 | ;; Boston, MA 02110-1301, USA. |
| 25 | |
| 26 | ;; Created: Jul. 14. 2003 |
| 27 | |
| 28 | ;;; Commentary: |
| 29 | |
| 30 | ;; This file provides character(Unicode) to glyph(CDAC) conversion and |
| 31 | ;; composition of Kannada script characters. |
| 32 | |
| 33 | ;;; Code: |
| 34 | |
| 35 | ;;;###autoload |
| 36 | |
| 37 | ;; Kannada Composable Pattern |
| 38 | ;; C .. Consonants |
| 39 | ;; V .. Vowel |
| 40 | ;; H .. Virama |
| 41 | ;; M .. Matra |
| 42 | ;; V .. Vowel |
| 43 | ;; (N .. Zerowidth Non Joiner) |
| 44 | ;; (J .. Zerowidth Joiner. ) |
| 45 | ;; 1. vowel |
| 46 | ;; V(A)? |
| 47 | ;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya) |
| 48 | ;; ((CH)?(CH)?(CH)?CH)?C(H|M?)? |
| 49 | |
| 50 | (defconst kannada-consonant |
| 51 | "[\e$,1>u\e(B-\e$,1?9\e(B]") |
| 52 | |
| 53 | (defconst kannada-consonant-needs-twirl |
| 54 | "[\e$,1>u>w\e(B-\e$,1>{>}\e(B-\e$,1>~? \e(B-\e$,1?"?$\e(B-\e$,1?+?-?0?3\e(B-\e$,1?9\e(B]\\(\e$,1?M\e(B[\e$,1>u\e(B-\e$,1?9\e(B]\\)*[\e$,1?A?B?C?D>b\e(B]?$") |
| 55 | |
| 56 | (defconst kannada-composable-pattern |
| 57 | (concat |
| 58 | "\\([\e$,1>b\e(B-\e$,1>t?`>l\e(B]\\)\\|[\e$,1>c\e(B]" |
| 59 | "\\|\\(" |
| 60 | "\\(?:\\(?:[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?\\(?:[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?\\(?:[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?[\e$,1>u\e(B-\e$,1?9\e(B]\e$,1?M\e(B\\)?" |
| 61 | "[\e$,1>u\e(B-\e$,1?9\e(B]\\(?:\e$,1?M\e(B\\|[\e$,1?>\e(B-\e$,1?M?U?C\e(B]?\\)?" |
| 62 | "\\)") |
| 63 | "Regexp matching a composable sequence of Kannada characters.") |
| 64 | |
| 65 | ;;;###autoload |
| 66 | (defun kannada-compose-region (from to) |
| 67 | (interactive "r") |
| 68 | (save-excursion |
| 69 | (save-restriction |
| 70 | (narrow-to-region from to) |
| 71 | (goto-char (point-min)) |
| 72 | (while (re-search-forward kannada-composable-pattern nil t) |
| 73 | (kannada-compose-syllable-region (match-beginning 0) |
| 74 | (match-end 0)))))) |
| 75 | ;;;###autoload |
| 76 | (defun kannada-compose-string (string) |
| 77 | (with-temp-buffer |
| 78 | (insert (decompose-string string)) |
| 79 | (kannada-compose-region (point-min) (point-max)) |
| 80 | (buffer-string))) |
| 81 | |
| 82 | ;;;###autoload |
| 83 | (defun kannada-post-read-conversion (len) |
| 84 | (save-excursion |
| 85 | (save-restriction |
| 86 | (let ((buffer-modified-p (buffer-modified-p))) |
| 87 | (narrow-to-region (point) (+ (point) len)) |
| 88 | (kannada-compose-region (point-min) (point-max)) |
| 89 | (set-buffer-modified-p buffer-modified-p) |
| 90 | (- (point-max) (point-min)))))) |
| 91 | |
| 92 | (defun kannada-range (from to) |
| 93 | "Make the list of the integers of range FROM to TO." |
| 94 | (let (result) |
| 95 | (while (<= from to) (setq result (cons to result) to (1- to))) result)) |
| 96 | |
| 97 | (defun kannada-regexp-of-hashtbl-keys (hashtbl) |
| 98 | "Return a regular expression that matches all keys in hashtable HASHTBL." |
| 99 | (let ((max-specpdl-size 1000)) |
| 100 | (regexp-opt |
| 101 | (sort |
| 102 | (let (dummy) |
| 103 | (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl) |
| 104 | dummy) |
| 105 | (function (lambda (x y) (> (length x) (length y)))))))) |
| 106 | |
| 107 | (defun kannada-regexp-of-hashtbl-vals (hashtbl) |
| 108 | "Return a regular expression that matches all values in hashtable HASHTBL." |
| 109 | (let ((max-specpdl-size 1000)) |
| 110 | (regexp-opt |
| 111 | (sort |
| 112 | (let (dummy) |
| 113 | (maphash (function (lambda (key val) (setq dummy (cons val dummy)))) hashtbl) |
| 114 | dummy) |
| 115 | (function (lambda (x y) (> (length x) (length y)))))))) |
| 116 | |
| 117 | (defun kannada-composition-function (from to pattern &optional string) |
| 118 | "Compose Kannada characters in REGION, or STRING if specified. |
| 119 | Assume that the REGION or STRING must fully match the composable |
| 120 | PATTERN regexp." |
| 121 | (if string (kannada-compose-syllable-string string) |
| 122 | (kannada-compose-syllable-region from to)) |
| 123 | (- to from)) |
| 124 | |
| 125 | ;; Register a function to compose Kannada characters. |
| 126 | (mapc |
| 127 | (function (lambda (ucs) |
| 128 | (aset composition-function-table (decode-char 'ucs ucs) |
| 129 | (list (cons kannada-composable-pattern |
| 130 | 'kannada-composition-function))))) |
| 131 | (kannada-range #x0c80 #x0cff)) |
| 132 | |
| 133 | ;; Notes on conversion steps. |
| 134 | |
| 135 | ;; 1. chars to glyphs |
| 136 | ;; |
| 137 | ;; Rules will not be applied to the virama appeared at the end of the |
| 138 | ;; text. Also, the preceding/following "r" will be treated as special case. |
| 139 | |
| 140 | ;; 2. glyphs reordering. |
| 141 | ;; |
| 142 | ;; The glyphs are split by virama, and each glyph groups are |
| 143 | ;; re-ordered in the following order. |
| 144 | ;; |
| 145 | ;; Note that `consonant-glyph' mentioned here does not contain the |
| 146 | ;; vertical bar (right modifier) attached at the right of the |
| 147 | ;; consonant. |
| 148 | ;; |
| 149 | ;; If the glyph-group contains right modifier, |
| 150 | ;; (1) consonant-glyphs/vowels |
| 151 | ;; (2) spacing |
| 152 | ;; (3) right modifier (may be matra) |
| 153 | ;; (4) top matra |
| 154 | ;; (5) preceding "r" |
| 155 | ;; (7) following "r" |
| 156 | ;; (8) bottom matra or virama. |
| 157 | ;; |
| 158 | ;; Otherwise, |
| 159 | ;; (1) consonant-glyph/vowels, with nukta sign |
| 160 | ;; (3) left matra |
| 161 | ;; (4) top matra |
| 162 | ;; (5) preceding "r" |
| 163 | ;; (7) following "r" |
| 164 | ;; (8) bottom matra or virama. |
| 165 | ;; (2) spacing |
| 166 | |
| 167 | ;; 3. glyph to glyph |
| 168 | ;; |
| 169 | ;; For better display, some glyph display would be tuned. |
| 170 | |
| 171 | ;; 4. Composition. |
| 172 | ;; |
| 173 | ;; left modifiers will be attached at the left. |
| 174 | ;; others will be attached right. |
| 175 | |
| 176 | ;; Problem:: |
| 177 | ;; Can we generalize this methods to other Indian scripts? |
| 178 | |
| 179 | (defvar knd-char-glyph |
| 180 | '(("\e$,1>e\e(B" . "\e$,43@\e(B") |
| 181 | ("\e$,1>f\e(B" . "\e$,43A\e(B") |
| 182 | ("\e$,1?>\e(B" . "\e$,44{\e(B") |
| 183 | ("\e$,1>g\e(B" . "\e$,43B\e(B") |
| 184 | ("\e$,1??\e(B" . nil) |
| 185 | ("\e$,1>h\e(B" . "\e$,43C\e(B") |
| 186 | ("\e$,1?@\e(B" . nil) |
| 187 | ("\e$,1>i\e(B" . "\e$,43D\e(B") |
| 188 | ("\e$,1?A\e(B" . "\\e$,44\7f\e(B") |
| 189 | ("\e$,1>j\e(B" . "\e$,43E\e(B") |
| 190 | ("\e$,1?B\e(B" . "\\e$,45 \e(B") |
| 191 | ("\e$,1>k\e(B" . "\e$,43F4\7f\e(B") |
| 192 | ("\e$,1?C\e(B" . "\\e$,45$\e(B") |
| 193 | ("\e$,1?`\e(B" . "\e$,43F5 \e(B") |
| 194 | ("\e$,1?D\e(B" . "\\e$,45%\e(B") |
| 195 | ;;("\e$,1>l\e(B" . nil) ; not implemented. |
| 196 | ;;("\e$,1?a\e(B" . nil) |
| 197 | ("\e$,1>n\e(B" . "\e$,43G\e(B") |
| 198 | ("\e$,1>o\e(B" . "\e$,43H\e(B") |
| 199 | ("\e$,1>p\e(B" . "\e$,43I\e(B") |
| 200 | ("\e$,1?F\e(B" . "\\e$,45&\e(B") |
| 201 | ("\e$,1?G\e(B" . "\\e$,45&4~\e(B") |
| 202 | ("\e$,1?H\e(B" . "\\e$,45&5'\e(B") |
| 203 | ("\e$,1>r\e(B" . "\e$,43J\e(B") |
| 204 | ("\e$,1?J\e(B" . "\e$,45&5 \e(B") |
| 205 | ("\e$,1>s\e(B" . "\e$,43K\e(B") |
| 206 | ("\e$,1?K\e(B" . "\\e$,45&5 4~\e(B") |
| 207 | ("\e$,1>t\e(B" . "\e$,43L\e(B") |
| 208 | ("\e$,1?L\e(B" . "\\e$,45(\e(B") |
| 209 | ("\e$,1>b\e(B" . "\e$,43M\e(B") |
| 210 | ("\e$,1>c\e(B" . "\e$,43N\e(B") |
| 211 | ("\e$,1>u?M\e(B" . "\e$,43O5)\e(B") ("\e$,1>u\e(B" . "\e$,43O\e(B") ("\e$,1>u??\e(B" . "\e$,43P\e(B") ("\e$,1>u?@\e(B" . "\e$,43P4~\e(B") |
| 212 | ("\e$,1>v?M\e(B" . "\e$,43S5)\e(B") ("\e$,1>v\e(B" . "\e$,43S\e(B") ("\e$,1>v??\e(B" . "\e$,43T\e(B") ("\e$,1>v?@\e(B" . "\e$,43T4~\e(B") ("\e$,1>v?F\e(B" . "\e$,43S5&\e(B") ("\e$,1>v?G\e(B" . "\e$,43S5&4~\e(B") ("\e$,1>v?H\e(B" . "\e$,43S5&5'\e(B") ("\e$,1>v?J\e(B" . "\e$,43S5&5&5 \e(B") ("\e$,1>v?K\e(B" . "\e$,43S5&5&5 4~\e(B") ("\e$,1>v?L\e(B" . "\e$,43S5(\e(B") |
| 213 | ("\e$,1>w?M\e(B" . "\e$,43V5)\e(B") ("\e$,1>w\e(B" . "\e$,43V\e(B") ("\e$,1>w??\e(B" . "\e$,43W\e(B") ("\e$,1>w?@\e(B" . "\e$,43W4~\e(B") |
| 214 | ("\e$,1>x?M\e(B" . "\e$,43Y5)\e(B") ("\e$,1>x\e(B" . "\e$,43Y\e(B") ("\e$,1>x??\e(B" . "\e$,43Z\e(B") ("\e$,1>x?@\e(B" . "\e$,43Z4~\e(B") |
| 215 | ("\e$,1>y?M\e(B" . "\e$,43\5)\e(B") ("\e$,1>y\e(B" . "\e$,43\\e(B") |
| 216 | ("\e$,1>z?M\e(B" . "\e$,43^5)\e(B") ("\e$,1>z\e(B" . "\e$,43^\e(B") ("\e$,1>z??\e(B" . "\e$,43_\e(B") ("\e$,1>z?@\e(B" . "\e$,43_4~\e(B") |
| 217 | ("\e$,1>{?M\e(B" . "\e$,43a5)\e(B") ("\e$,1>{\e(B" . "\e$,43a\e(B") ("\e$,1>{??\e(B" . "\e$,43b\e(B") ("\e$,1>{?@\e(B" . "\e$,43b4~\e(B") |
| 218 | ("\e$,1>|?M\e(B" . "\e$,43d5)\e(B") ("\e$,1>|\e(B" . "\e$,43d\e(B") ("\e$,1>|??\e(B" . "\e$,43f\e(B") ("\e$,1>|?@\e(B" . "\e$,43f4~\e(B") ("\e$,1>|?F\e(B" . "\e$,43e5&\e(B") ("\e$,1>|?G\e(B" . "\e$,43e5&4~\e(B") ("\e$,1>|?H\e(B" . "\e$,43e5&5'\e(B") ("\e$,1>|?J\e(B" . "\e$,43e5&5&5 \e(B") ("\e$,1>|?K\e(B" . "\e$,43e5&5&5 4~\e(B") ("\e$,1>|?L\e(B" . "\e$,43e5(\e(B") |
| 219 | ("\e$,1>}?M\e(B" . "\e$,44a4z3h4\7f5)\e(B") ("\e$,1>}\e(B" . "\e$,44a4z3h4\7f\e(B") ("\e$,1>}??\e(B" . "\e$,44b3h4\7f\e(B") ("\e$,1>}?@\e(B" . "\e$,44b3h4\7f4~\e(B") ("\e$,1>}?B\e(B". "\e$,44a4z3h5 \e(B") ("\e$,1>}?J\e(B". "\e$,44a5&3h5 \e(B") ("\e$,1>}?K\e(B". "\e$,44a5&3h5 4~\e(B") |
| 220 | ("\e$,1>~?M\e(B" . "\e$,43j5)\e(B") ("\e$,1>~\e(B" . "\e$,43j\e(B") |
| 221 | ("\e$,1>\7f?M\e(B" . "\e$,43m5)\e(B") ("\e$,1>\7f\e(B" . "\e$,43l\e(B") ("\e$,1?#?>\e(B" . "\e$,43m4{\e(B") ("\e$,1>\7f??\e(B" . "\e$,43n\e(B") ("\e$,1>\7f?@\e(B" . "\e$,43n4~\e(B") ("\e$,1>\7f?F\e(B" . "\e$,43m5&\e(B") ("\e$,1>\7f?G\e(B" . "\e$,43m5&4~\e(B") ("\e$,1>\7f?H\e(B" . "\e$,43m5&5'\e(B") ("\e$,1>\7f?J\e(B" . "\e$,43m5&5&5 \e(B") ("\e$,1>\7f?K\e(B" . "\e$,43m5&5&5 4~\e(B") ("\e$,1>\7f?L\e(B" . "\e$,43m5(\e(B") |
| 222 | ("\e$,1? ?M\e(B" . "\e$,43p5)\e(B") ("\e$,1? \e(B" . "\e$,43p\e(B") ("\e$,1? ??\e(B" . "\e$,43q\e(B") ("\e$,1? ?@\e(B" . "\e$,43q4~\e(B") |
| 223 | ("\e$,1?!?M\e(B" . "\e$,43s5)\e(B") ("\e$,1?!\e(B" . "\e$,43s\e(B") ("\e$,1?!??\e(B" . "\e$,43t\e(B") ("\e$,1?!?@\e(B" . "\e$,43t4~\e(B") |
| 224 | ("\e$,1?"?M\e(B" . "\e$,43v5)\e(B") ("\e$,1?"\e(B" . "\e$,43v\e(B") ("\e$,1?"??\e(B" . "\e$,43w\e(B") ("\e$,1?"?@\e(B" . "\e$,43w4~\e(B") |
| 225 | ("\e$,1?#?M\e(B" . "\e$,43z5)\e(B") ("\e$,1?#\e(B" . "\e$,43y\e(B") ("\e$,1?#?>\e(B" . "\e$,43z4{\e(B") ("\e$,1?#??\e(B" . "\e$,43{\e(B") ("\e$,1?#?@\e(B" . "\e$,43{4~\e(B") ("\e$,1?#?F\e(B" . "\e$,43z5&\e(B") ("\e$,1?#?G\e(B" . "\e$,43z5&4~\e(B") ("\e$,1?#?H\e(B" . "\e$,43z5&5'\e(B") ("\e$,1?#?J\e(B" . "\e$,43z5&5&5 \e(B") ("\e$,1?#?K\e(B" . "\e$,43z5&5&5 4~\e(B") ("\e$,1?#?L\e(B" . "\e$,43z5(\e(B") |
| 226 | ("\e$,1?$?M\e(B" . "\e$,43}5)\e(B") ("\e$,1?$\e(B" . "\e$,43}\e(B") ("\e$,1?$??\e(B" . "\e$,43~\e(B") ("\e$,1?$?@\e(B" . "\e$,43~4~\e(B") |
| 227 | ("\e$,1?%?M\e(B" . "\e$,44B5)\e(B") ("\e$,1?%\e(B" . "\e$,44B\e(B") ("\e$,1?%??\e(B" . "\e$,44C\e(B") ("\e$,1?%?@\e(B" . "\e$,44C4~\e(B") |
| 228 | ("\e$,1?&?M\e(B" . "\e$,44E5)\e(B") ("\e$,1?&\e(B" . "\e$,44E\e(B") ("\e$,1?&??\e(B" . "\e$,44F\e(B") ("\e$,1?&?@\e(B" . "\e$,44F4~\e(B") |
| 229 | ("\e$,1?'?M\e(B" . "\e$,44H5)\e(B") ("\e$,1?'\e(B" . "\e$,44H\e(B") ("\e$,1?'??\e(B" . "\e$,44I\e(B") ("\e$,1?'?@\e(B" . "\e$,44I4~\e(B") |
| 230 | ("\e$,1?(?M\e(B" . "\e$,44K5)\e(B") ("\e$,1?(\e(B" . "\e$,44K\e(B") ("\e$,1?(??\e(B" . "\e$,44L\e(B") ("\e$,1?(?@\e(B" . "\e$,44L4~\e(B") |
| 231 | ("\e$,1?*?M\e(B" . "\e$,44N5)\e(B") ("\e$,1?*\e(B" . "\e$,44N\e(B") ("\e$,1?*??\e(B" . "\e$,44O\e(B") ("\e$,1?*?@\e(B" . "\e$,44O4~\e(B") ("\e$,1?*?A\e(B" . "\e$,44N5"\e(B") ("\e$,1?*?B\e(B" . "\e$,44N5#\e(B") ("\e$,1?*?J\e(B" . "\e$,44N5&5#\e(B") ("\e$,1?*?K\e(B" . "\e$,44N5&5#4~\e(B") |
| 232 | ("\e$,1?+?M\e(B" . "\e$,44Q5)\e(B") ("\e$,1?+\e(B" . "\e$,44Q\e(B") ("\e$,1?+??\e(B" . "\e$,44R\e(B") ("\e$,1?+?@\e(B" . "\e$,44R4~\e(B") ("\e$,1?+?A\e(B" . "\e$,44Q5"\e(B") ("\e$,1?+?B\e(B" . "\e$,44Q5#\e(B") ("\e$,1?+?J\e(B" . "\e$,44Q5&5#\e(B") ("\e$,1?+?K\e(B" . "\e$,44Q5&5#4~\e(B") |
| 233 | ("\e$,1?,?M\e(B" . "\e$,44W5)\e(B") ("\e$,1?,\e(B" . "\e$,44V\e(B") ("\e$,1?,?>\e(B". "\e$,44W4{\e(B") ("\e$,1?,??\e(B" . "\e$,44X\e(B") ("\e$,1?,?@\e(B" . "\e$,44X4~\e(B") ("\e$,1?,?F\e(B" . "\e$,44W5&\e(B") ("\e$,1?,?G\e(B" . "\e$,44W5&4~\e(B") ("\e$,1?,?H\e(B" . "\e$,44W5&5'\e(B") ("\e$,1?,?J\e(B" . "\e$,44W5&5&5 \e(B") ("\e$,1?,?K\e(B" . "\e$,44W5&5&5 4~\e(B") ("\e$,1?,?L\e(B" . "\e$,44W5(\e(B") |
| 234 | ("\e$,1?-?M\e(B" . "\e$,44Z5)\e(B") ("\e$,1?-\e(B" . "\e$,44Z\e(B") ("\e$,1?-??\e(B" . "\e$,44[\e(B") ("\e$,1?-?@\e(B" . "\e$,44[4~\e(B") |
| 235 | ("\e$,1?.?M\e(B" . "\e$,44h5!5)\e(B") ("\e$,1?.\e(B" . "\e$,44h4z4\7f\e(B") ("\e$,1?.?>\e(B" . "\e$,44h4z5!4{\e(B") ("\e$,1?.??\e(B" . "\e$,44i4\7f\e(B") ("\e$,1?.?@\e(B" . "\e$,44i4\7f4~\e(B") ("\e$,1?.?J\e(B". "\e$,44h5&5 \e(B") ("\e$,1?.?K\e(B". "\e$,44h5&5 4~\e(B") |
| 236 | ("\e$,1?/?M\e(B" . "\e$,44^4z5!5)\e(B") ("\e$,1?/\e(B" . "\e$,44^4z4\7f\e(B") ("\e$,1?/?>\e(B" . "\e$,44^4z5!4{\e(B")("\e$,1?/??\e(B" . "\e$,44_4\7f\e(B") ("\e$,1?/?@\e(B" . "\e$,44_4\7f4~\e(B") ("\e$,1?/?J\e(B" . "\e$,44^5&5 \e(B") ("\e$,1?/?K\e(B" . "\e$,44^5&5 4~\e(B") |
| 237 | ("\e$,1?0?M\e(B" . "\e$,44a5)\e(B") ("\e$,1?0\e(B" . "\e$,44a\e(B") ("\e$,1?0??\e(B" . "\e$,44b\e(B") ("\e$,1?0?@\e(B" . "\e$,44b4~\e(B") |
| 238 | ("\e$,1?0?M\e(B" . "\e$,44a5)\e(B") ("\e$,1?0\e(B" . "\e$,44a\e(B") ("\e$,1?0??\e(B" . "\e$,44b\e(B") ("\e$,1?0?@\e(B" . "\e$,44b4~\e(B") |
| 239 | ("\e$,1?2?M\e(B" . "\e$,44e5)\e(B") ("\e$,1?2\e(B" . "\e$,44d\e(B") ("\e$,1?2?>\e(B" . "\e$,44e4{\e(B") ("\e$,1?2??\e(B" . "\e$,44f\e(B") ("\e$,1?2?@\e(B" . "\e$,44f4~\e(B") ("\e$,1?2?F\e(B" . "\e$,44e5&\e(B") ("\e$,1?2?G\e(B" . "\e$,44e5&4~\e(B") ("\e$,1?2?H\e(B" . "\e$,44e5&5'\e(B") ("\e$,1?2?J\e(B" . "\e$,44e5&5&5 \e(B") ("\e$,1?2?K\e(B" . "\e$,44e5&5&5 4~\e(B") ("\e$,1?2?L\e(B" . "\e$,44e5(\e(B") |
| 240 | ("\e$,1?5?M\e(B" . "\e$,44h5)\e(B") ("\e$,1?5\e(B" . "\e$,44h\e(B") ("\e$,1?5??\e(B" . "\e$,44i\e(B") ("\e$,1?5?@\e(B" . "\e$,44i4~\e(B") ("\e$,1?5?A\e(B" . "\e$,44h5"\e(B") ("\e$,1?5?B\e(B" . "\e$,44h5#\e(B") ("\e$,1?5?J\e(B" . "\e$,44h5&5#\e(B") ("\e$,1?5?K\e(B" . "\e$,44h5&5#4~\e(B") |
| 241 | ("\e$,1?6?M\e(B" . "\e$,44k5)\e(B") ("\e$,1?6\e(B" . "\e$,44k\e(B") ("\e$,1?6??\e(B" . "\e$,44l\e(B") ("\e$,1?6?@\e(B" . "\e$,44l4~\e(B") |
| 242 | ("\e$,1?7?M\e(B" . "\e$,44n5)\e(B") ("\e$,1?7\e(B" . "\e$,44n\e(B") ("\e$,1?7??\e(B" . "\e$,44o\e(B") ("\e$,1?7?@\e(B" . "\e$,44o4~\e(B") |
| 243 | ("\e$,1?8?M\e(B" . "\e$,44q5)\e(B") ("\e$,1?8\e(B" . "\e$,44q\e(B") ("\e$,1?8??\e(B" . "\e$,44r\e(B") ("\e$,1?8?@\e(B" . "\e$,44r4~\e(B") |
| 244 | ("\e$,1?9?M\e(B" . "\e$,44t5)\e(B") ("\e$,1?9\e(B" . "\e$,44t\e(B") ("\e$,1?9??\e(B" . "\e$,44u\e(B") ("\e$,1?9?@\e(B" . "\e$,44u4~\e(B") |
| 245 | ("\e$,1?3?M\e(B" . "\e$,44w5)\e(B") ("\e$,1?3\e(B" . "\e$,44w\e(B") ("\e$,1?3??\e(B" . "\e$,44x\e(B") ("\e$,1?3?@\e(B" . "\e$,44x4~\e(B")) |
| 246 | "Kannada characters to glyphs conversion table. |
| 247 | Default value contains only the basic rules.") |
| 248 | |
| 249 | (defvar knd-char-glyph-hash |
| 250 | (let* ((hash (make-hash-table :test 'equal))) |
| 251 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) |
| 252 | knd-char-glyph) |
| 253 | hash)) |
| 254 | |
| 255 | (defvar knd-char-glyph-regexp |
| 256 | (kannada-regexp-of-hashtbl-keys knd-char-glyph-hash)) |
| 257 | |
| 258 | (defvar knd-conjunct-glyph |
| 259 | '(("\e$,1>u\e(B" . "\e$,43Q\e(B") ("\e$,1>v\e(B" . "\e$,43U\e(B") ("\e$,1>w\e(B" . "\e$,43X\e(B") ("\e$,1>x\e(B" . "\e$,43[\e(B") ("\e$,1>y\e(B" . "\e$,43]\e(B") |
| 260 | ("\e$,1>z\e(B" . "\e$,43`\e(B") ("\e$,1>{\e(B" . "\e$,43c\e(B") ("\e$,1>|\e(B" . "\e$,43g\e(B") ("\e$,1>}\e(B" . "\e$,43i\e(B") ("\e$,1>~\e(B" . "\e$,43k\e(B") |
| 261 | ("\e$,1>\7f\e(B" . "\e$,43o\e(B") ("\e$,1? \e(B" . "\e$,43r\e(B") ("\e$,1?!\e(B" . "\e$,43u\e(B") ("\e$,1?"\e(B" . "\e$,43x\e(B") ("\e$,1?#\e(B" . "\e$,43|\e(B") |
| 262 | ("\e$,1?$\e(B" . "\e$,44A\e(B") ("\e$,1?%\e(B" . "\e$,44D\e(B") ("\e$,1?&\e(B" . "\e$,44G\e(B") ("\e$,1?'\e(B" . "\e$,44J\e(B") ("\e$,1?(\e(B" . "\e$,44M\e(B") |
| 263 | ("\e$,1?*\e(B" . "\e$,44P\e(B") ("\e$,1?+\e(B" . "\e$,44U\e(B") ("\e$,1?,\e(B" . "\e$,44Y\e(B") ("\e$,1?-\e(B" . "\e$,44\\e(B") ("\e$,1?.\e(B" . "\e$,44]\e(B") |
| 264 | ("\e$,1?/\e(B" . "\e$,44`\e(B") ("\e$,1?0\e(B" . "\e$,44c\e(B") ("\e$,1?2\e(B" . "\e$,44g\e(B") ("\e$,1?3\e(B" . "\e$,44y\e(B") ("\e$,1?5\e(B" . "\e$,44j\e(B") |
| 265 | ("\e$,1?6\e(B" . "\e$,44m\e(B") ("\e$,1?7\e(B" . "\e$,44p\e(B") ("\e$,1?8\e(B" . "\e$,44s\e(B") ("\e$,1?9\e(B" . "\e$,44v\e(B")) |
| 266 | "Kannada characters to conjunct glyphs conversion table.") |
| 267 | |
| 268 | (defvar knd-conjunct-glyph-hash |
| 269 | (let* ((hash (make-hash-table :test 'equal))) |
| 270 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) |
| 271 | knd-conjunct-glyph) |
| 272 | hash)) |
| 273 | |
| 274 | (defvar knd-conjunct-glyph-regexp |
| 275 | (kannada-regexp-of-hashtbl-vals knd-conjunct-glyph-hash)) |
| 276 | |
| 277 | (mapc |
| 278 | (function (lambda (x) |
| 279 | (put-char-code-property (aref (cdr x) 0) 'reference-point '(5 . 3)))) |
| 280 | knd-conjunct-glyph) |
| 281 | |
| 282 | ;; glyph-to-glyph conversion table. |
| 283 | ;; it is supposed that glyphs are ordered in |
| 284 | ;; [consonant/nukta] - [matra/virama] - [preceding-r] - [anuswar]. |
| 285 | |
| 286 | (defvar knd-glyph-glyph |
| 287 | '(("\e$,45$4A\e(B" . "\e$,45*\e(B") |
| 288 | ("\e$,45'4A\e(B" . "\e$,45+\e(B") |
| 289 | ("\e$,44A3g\e(B" . "\e$,45,\e(B") |
| 290 | ("\e$,45$3Q\e(B" . "\e$,45-\e(B"))) |
| 291 | |
| 292 | (defvar knd-glyph-glyph-hash |
| 293 | (let* ((hash (make-hash-table :test 'equal))) |
| 294 | (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) |
| 295 | knd-glyph-glyph) |
| 296 | hash)) |
| 297 | (defvar knd-glyph-glyph-regexp |
| 298 | (kannada-regexp-of-hashtbl-keys knd-glyph-glyph-hash)) |
| 299 | |
| 300 | (defun knd-charseq (from &optional to) |
| 301 | (if (null to) (setq to from)) |
| 302 | (mapcar (function (lambda (x) (indian-glyph-char x 'kannada))) |
| 303 | (kannada-range from to))) |
| 304 | |
| 305 | (defvar knd-glyph-cv |
| 306 | (append |
| 307 | (knd-charseq #x40 #x50) |
| 308 | (knd-charseq #x52 #x54) |
| 309 | (knd-charseq #x56 #x57) |
| 310 | (knd-charseq #x59 #x5a) |
| 311 | (knd-charseq #x5c) |
| 312 | (knd-charseq #x5e #x5f) |
| 313 | (knd-charseq #x61 #x62) |
| 314 | (knd-charseq #x64 #x66) |
| 315 | (knd-charseq #x6a) |
| 316 | (knd-charseq #x6c #x6e) |
| 317 | (knd-charseq #x70 #x71) |
| 318 | (knd-charseq #x73 #x74) |
| 319 | (knd-charseq #x76 #x77) |
| 320 | (knd-charseq #x79 #x7b) |
| 321 | (knd-charseq #x7d #x7e) |
| 322 | (knd-charseq #xa2 #xa3) |
| 323 | (knd-charseq #xa5 #xa6) |
| 324 | (knd-charseq #xa8 #xa9) |
| 325 | (knd-charseq #xab #xac) |
| 326 | (knd-charseq #xae #xaf) |
| 327 | (knd-charseq #xb1 #xb2) |
| 328 | (knd-charseq #xb6 #xb8) |
| 329 | (knd-charseq #xb6 #xb8) |
| 330 | (knd-charseq #xba #xbb) |
| 331 | (knd-charseq #xbe #xbf) |
| 332 | (knd-charseq #xc1 #xc2) |
| 333 | (knd-charseq #xc4 #xc6) |
| 334 | (knd-charseq #xc8 #xc9) |
| 335 | (knd-charseq #xcb #xcc) |
| 336 | (knd-charseq #xce #xcf) |
| 337 | (knd-charseq #xd1 #xd2) |
| 338 | (knd-charseq #xd4 #xd5) |
| 339 | (knd-charseq #xd7 #xd8) |
| 340 | (knd-charseq #xc3)) |
| 341 | "Kannada Consonants/Vowels/Nukta Glyphs") |
| 342 | |
| 343 | (defvar knd-glyph-space |
| 344 | (knd-charseq #xb3 #xb4) |
| 345 | "Kannada Spacing Glyphs") |
| 346 | |
| 347 | (defvar knd-glyph-right-modifier |
| 348 | (append |
| 349 | (knd-charseq #xdb #xdd) |
| 350 | (knd-charseq #xdf) |
| 351 | (knd-charseq #xe0 #xe3) |
| 352 | (knd-charseq #xe9)) |
| 353 | "Kannada Modifiers attached at the right side.") |
| 354 | |
| 355 | (defvar knd-glyph-right-modifier-regexp |
| 356 | (concat "[" knd-glyph-right-modifier "]")) |
| 357 | |
| 358 | (defvar knd-glyph-jha-tail |
| 359 | (knd-charseq #x68) |
| 360 | "Kannada tail for jha.") |
| 361 | |
| 362 | (defvar knd-glyph-top-matra |
| 363 | (append |
| 364 | (knd-charseq #xda) |
| 365 | (knd-charseq #xdd) |
| 366 | (knd-charseq #xe6) |
| 367 | (knd-charseq #xe8)) |
| 368 | "Kannada Matras attached at the top side.") |
| 369 | |
| 370 | (defvar knd-glyph-bottom-matra |
| 371 | (append |
| 372 | (knd-charseq #xe4 #xe5) |
| 373 | (knd-charseq #xe7)) |
| 374 | "Kannada Matras attached at the bottom.") |
| 375 | |
| 376 | (defvar knd-glyph-end-marks |
| 377 | (append |
| 378 | (knd-charseq #x25) |
| 379 | (knd-charseq #x4d #x4e) |
| 380 | (knd-charseq #xde)) |
| 381 | "Kannada end marks: arkavattu, virama, au and diirghaa.") |
| 382 | |
| 383 | (defvar knd-glyph-bottom-modifier |
| 384 | (append |
| 385 | (knd-charseq #x51) |
| 386 | (knd-charseq #x55) |
| 387 | (knd-charseq #x58) |
| 388 | (knd-charseq #x5b) |
| 389 | (knd-charseq #x5d) |
| 390 | (knd-charseq #x60) |
| 391 | (knd-charseq #x63) |
| 392 | (knd-charseq #x67) |
| 393 | (knd-charseq #x69) |
| 394 | (knd-charseq #x6b) |
| 395 | (knd-charseq #x6f) |
| 396 | (knd-charseq #x72) |
| 397 | (knd-charseq #x75) |
| 398 | (knd-charseq #x78) |
| 399 | (knd-charseq #x7c) |
| 400 | (knd-charseq #xa1) |
| 401 | (knd-charseq #xa4) |
| 402 | (knd-charseq #xa7) |
| 403 | (knd-charseq #xaa) |
| 404 | (knd-charseq #xad) |
| 405 | (knd-charseq #xb0) |
| 406 | (knd-charseq #xb5) |
| 407 | (knd-charseq #xb9) |
| 408 | (knd-charseq #xbc #xbd) |
| 409 | (knd-charseq #xc0) |
| 410 | (knd-charseq #xc3) |
| 411 | (knd-charseq #xc7) |
| 412 | (knd-charseq #xca) |
| 413 | (knd-charseq #xcd) |
| 414 | (knd-charseq #xd0) |
| 415 | (knd-charseq #xd3) |
| 416 | (knd-charseq #xd6) |
| 417 | (knd-charseq #xd9) |
| 418 | (knd-charseq #xea #xef)) |
| 419 | "Kannada Modifiers attached at the bottom.") |
| 420 | |
| 421 | (defvar knd-glyph-order |
| 422 | `((,knd-glyph-cv . 1) |
| 423 | (,knd-glyph-top-matra . 2) |
| 424 | (,knd-glyph-jha-tail . 3) |
| 425 | (,knd-glyph-right-modifier . 4) |
| 426 | (,knd-glyph-space . 5) |
| 427 | (,knd-glyph-bottom-modifier . 5) |
| 428 | (,knd-glyph-bottom-matra . 6) |
| 429 | (,knd-glyph-end-marks . 7) |
| 430 | )) |
| 431 | |
| 432 | (mapc |
| 433 | (function (lambda (x) |
| 434 | (mapc |
| 435 | (function (lambda (y) |
| 436 | (put-char-code-property y 'composition-order (cdr x)))) |
| 437 | (car x)))) |
| 438 | knd-glyph-order) |
| 439 | |
| 440 | (defun kannada-compose-syllable-string (string) |
| 441 | (with-temp-buffer |
| 442 | (insert (decompose-string string)) |
| 443 | (kannada-compose-syllable-region (point-min) (point-max)) |
| 444 | (buffer-string))) |
| 445 | |
| 446 | ;; kch |
| 447 | (defun kannada-compose-syllable-region (from to) |
| 448 | "Compose kannada syllable in region FROM to TO." |
| 449 | (let ((glyph-str nil) (cons-num 0) (glyph-str-list nil) |
| 450 | (last-virama nil) (preceding-r nil) (last-modifier nil) |
| 451 | (last-char (char-before to)) match-str pos |
| 452 | glyph-block split-pos (conj nil) (rest nil)) |
| 453 | (save-excursion |
| 454 | (save-restriction |
| 455 | ;;; *** char-to-glyph conversion *** |
| 456 | ;; Special rule 1. -- Last virama must be preserved. |
| 457 | (if (eq last-char ?\e$,1?M\e(B) |
| 458 | (progn |
| 459 | (setq last-virama t) |
| 460 | (narrow-to-region from (1- to))) |
| 461 | (narrow-to-region from to)) |
| 462 | (goto-char (point-min)) |
| 463 | ;; Special rule 2. -- preceding "r virama" must be modifier. |
| 464 | (when (looking-at "\e$,1?0?M\e(B.") |
| 465 | (setq preceding-r t) |
| 466 | (goto-char (+ 2 (point)))) |
| 467 | ;; remove conjunct consonants |
| 468 | (while (re-search-forward knd-char-glyph-regexp nil t) |
| 469 | (setq match-str (match-string 0)) |
| 470 | (if (and (string-match kannada-consonant match-str) |
| 471 | (> cons-num 0)) |
| 472 | (progn |
| 473 | (setq conj (concat conj (gethash (match-string 0 match-str) |
| 474 | knd-conjunct-glyph-hash))) |
| 475 | (setq match-str (replace-match "" t nil match-str)) |
| 476 | (if (string-match "\e$,1?M\e(B" rest) |
| 477 | (setq rest (replace-match "" t nil rest))))) |
| 478 | (setq rest (concat rest match-str)) |
| 479 | ;; count the number of consonant-glyhs. |
| 480 | (if (string-match kannada-consonant match-str) |
| 481 | (setq cons-num (1+ cons-num)))) |
| 482 | ;; translate the rest characters into glyphs |
| 483 | (setq pos 0) |
| 484 | (while (string-match knd-char-glyph-regexp rest pos) |
| 485 | (setq match-str (match-string 0 rest)) |
| 486 | (setq pos (match-end 0)) |
| 487 | (setq glyph-str |
| 488 | (concat glyph-str (gethash match-str knd-char-glyph-hash)))) |
| 489 | |
| 490 | (if conj (setq glyph-str (concat glyph-str conj))) |
| 491 | (if last-virama (setq glyph-str (concat glyph-str "\e$,45)\e(B")) |
| 492 | (goto-char (point-min)) |
| 493 | (if (re-search-forward kannada-consonant-needs-twirl nil t) |
| 494 | (progn |
| 495 | (setq match-str (match-string 0)) |
| 496 | (setq glyph-str (concat glyph-str "\e$,44z\e(B"))))) |
| 497 | ;; preceding-r must be attached |
| 498 | (if preceding-r |
| 499 | (setq glyph-str (concat glyph-str "\e$,43%\e(B"))) |
| 500 | ;;; *** glyph-to-glyph conversion *** |
| 501 | (when (string-match knd-glyph-glyph-regexp glyph-str) |
| 502 | (setq glyph-str |
| 503 | (replace-match (gethash (match-string 0 glyph-str) |
| 504 | knd-glyph-glyph-hash) |
| 505 | nil t glyph-str))) |
| 506 | ;;; *** glyph reordering *** |
| 507 | (while (setq split-pos (string-match "\e$,45)\e(B\\|.$" glyph-str)) |
| 508 | (setq glyph-block (substring glyph-str 0 (1+ split-pos))) |
| 509 | (setq glyph-str (substring glyph-str (1+ split-pos))) |
| 510 | (setq |
| 511 | glyph-block |
| 512 | (sort (string-to-list glyph-block) |
| 513 | (function (lambda (x y) |
| 514 | (< (get-char-code-property x 'composition-order) |
| 515 | (get-char-code-property y 'composition-order)))))) |
| 516 | (setq glyph-str-list (nconc glyph-str-list glyph-block))) |
| 517 | ;;; *** insert space glyphs for kerning *** |
| 518 | (if (> cons-num 0) |
| 519 | (let ((curr glyph-str-list) (prev nil) (last-bott nil) bott co) |
| 520 | (while curr |
| 521 | (setq co (get-char-code-property |
| 522 | (car curr) 'composition-order) |
| 523 | bott (or (eq co 5) (eq co 6))) |
| 524 | (if (and bott last-bott) |
| 525 | (setcdr prev (cons ?\e$,44T\e(B curr))) |
| 526 | (setq last-bott bott prev curr curr (cdr curr))))) |
| 527 | ;; concatenate and attach reference-points. |
| 528 | (setq glyph-str |
| 529 | (cdr |
| 530 | (apply |
| 531 | 'nconc |
| 532 | (mapcar |
| 533 | (function (lambda (x) |
| 534 | (list |
| 535 | (or (get-char-code-property x 'reference-point) |
| 536 | '(5 . 3) ;; default reference point. |
| 537 | ) |
| 538 | x))) |
| 539 | glyph-str-list)))))) |
| 540 | (compose-region from to glyph-str))) |
| 541 | |
| 542 | (provide 'knd-util) |
| 543 | |
| 544 | ;;; arch-tag: 78d32230-a960-46a5-b622-61ed6ffcf8fc |
| 545 | ;;; knd-util.el ends here |