| 1 | ;;; lao-util.el --- utilities for Lao -*- coding: iso-2022-7bit; -*- |
| 2 | |
| 3 | ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 |
| 4 | ;; Free Software Foundation, Inc. |
| 5 | ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, |
| 6 | ;; 2007, 2008 |
| 7 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
| 8 | ;; Registration Number H14PRO021 |
| 9 | ;; Copyright (C) 2003 |
| 10 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
| 11 | ;; Registration Number H13PRO009 |
| 12 | |
| 13 | ;; Keywords: multilingual, Lao, i18n |
| 14 | |
| 15 | ;; This file is part of GNU Emacs. |
| 16 | |
| 17 | ;; GNU Emacs is free software; you can redistribute it and/or modify |
| 18 | ;; it under the terms of the GNU General Public License as published by |
| 19 | ;; the Free Software Foundation; either version 3, or (at your option) |
| 20 | ;; any later version. |
| 21 | |
| 22 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 23 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 24 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 25 | ;; GNU General Public License for more details. |
| 26 | |
| 27 | ;; You should have received a copy of the GNU General Public License |
| 28 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
| 29 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 30 | ;; Boston, MA 02110-1301, USA. |
| 31 | |
| 32 | ;;; Commentary: |
| 33 | |
| 34 | ;;; Code: |
| 35 | |
| 36 | ;; Setting information of Thai characters. |
| 37 | |
| 38 | (defconst lao-category-table (make-category-table)) |
| 39 | (define-category ?c "Lao consonant" lao-category-table) |
| 40 | (define-category ?s "Lao semi-vowel" lao-category-table) |
| 41 | (define-category ?v "Lao upper/lower vowel" lao-category-table) |
| 42 | (define-category ?t "Lao tone" lao-category-table) |
| 43 | |
| 44 | (let ((l '((?\e(1!\e(B consonant "LETTER KOR KAI'" "CHICKEN") |
| 45 | (?\e(1"\e(B consonant "LETTER KHOR KHAI'" "EGG") |
| 46 | (?\e(1#\e(B invalid nil) |
| 47 | (?\e(1$\e(B consonant "LETTER QHOR QHWARGN" "BUFFALO") |
| 48 | (?\e(1%\e(B invalid nil) |
| 49 | (? invalid nil) |
| 50 | (?\e(1'\e(B consonant "LETTER NGOR NGUU" "SNAKE") |
| 51 | (?\e(1(\e(B consonant "LETTER JOR JUA" "BUDDHIST NOVICE") |
| 52 | (?\e(1)\e(B invalid nil) |
| 53 | (?\e(1*\e(B consonant "LETTER XOR X\"ARNG" "ELEPHANT") |
| 54 | (?\e(1+\e(B invalid nil) |
| 55 | (?\e(1,\e(B invalid nil) |
| 56 | (?\e(1-\e(B consonant "LETTER YOR YUNG" "MOSQUITO") |
| 57 | (?\e(1.\e(B invalid nil) |
| 58 | (?\e(1.\e(B invalid nil) |
| 59 | (?\e(1.\e(B invalid nil) |
| 60 | (?\e(1.\e(B invalid nil) |
| 61 | (?\e(1.\e(B invalid nil) |
| 62 | (?\e(1.\e(B invalid nil) |
| 63 | (?\e(14\e(B consonant "LETTER DOR DANG" "NOSE") |
| 64 | (?\e(15\e(B consonant "LETTER TOR TAR" "EYE") |
| 65 | (?\e(16\e(B consonant "LETTER THOR THUNG" "TO ASK,QUESTION") |
| 66 | (?\e(17\e(B consonant "LETTER DHOR DHARM" "FLAG") |
| 67 | (?\e(18\e(B invalid nil) |
| 68 | (?\e(19\e(B consonant "LETTER NOR NOK" "BIRD") |
| 69 | (?\e(1:\e(B consonant "LETTER BOR BED" "FISHHOOK") |
| 70 | (?\e(1;\e(B consonant "LETTER POR PAR" "FISH") |
| 71 | (?\e(1<\e(B consonant "LETTER HPOR HPER\"" "BEE") |
| 72 | (?\e(1=\e(B consonant "LETTER FHOR FHAR" "WALL") |
| 73 | (?\e(1>\e(B consonant "LETTER PHOR PHUU" "MOUNTAIN") |
| 74 | (?\e(1?\e(B consonant "LETTER FOR FAI" "FIRE") |
| 75 | (?\e(1@\e(B invalid nil) |
| 76 | (?\e(1A\e(B consonant "LETTER MOR MAR\"" "HORSE") |
| 77 | (?\e(1B\e(B consonant "LETTER GNOR GNAR" "MEDICINE") |
| 78 | (?\e(1C\e(B consonant "LETTER ROR ROD" "CAR") |
| 79 | (?\e(1D\e(B invalid nil) |
| 80 | (?\e(1E\e(B consonant "LETTER LOR LIING" "MONKEY") |
| 81 | (?\e(1F\e(B invalid nil) |
| 82 | (?\e(1G\e(B consonant "LETTER WOR WII" "HAND FAN") |
| 83 | (?\e(1H\e(B invalid nil) |
| 84 | (?\e(1I\e(B invalid nil) |
| 85 | (?\e(1J\e(B consonant "LETTER SOR SEA" "TIGER") |
| 86 | (?\e(1K\e(B consonant "LETTER HHOR HHAI" "JAR") |
| 87 | (?\e(1L\e(B invalid nil) |
| 88 | (?\e(1M\e(B consonant "LETTER OR OOW" "TAKE") |
| 89 | (?\e(1N\e(B consonant "LETTER HOR HEA" "BOAT") |
| 90 | (?\e(1O\e(B special "ELLIPSIS") |
| 91 | (?\e(1P\e(B vowel-base "VOWEL SIGN SARA A") |
| 92 | (?\e(1Q\e(B vowel-upper "VOWEL SIGN MAI KAN") |
| 93 | (?\e(1R\e(B vowel-base "VOWEL SIGN SARA AR") |
| 94 | (?\e(1S\e(B vowel-base "VOWEL SIGN SARA AM") |
| 95 | (?\e(1T\e(B vowel-upper "VOWEL SIGN SARA I") |
| 96 | (?\e(1U\e(B vowel-upper "VOWEL SIGN SARA II") |
| 97 | (?\e(1V\e(B vowel-upper "VOWEL SIGN SARA EU") |
| 98 | (?\e(1W\e(B vowel-upper "VOWEL SIGN SARA UR") |
| 99 | (?\e(1X\e(B vowel-lower "VOWEL SIGN SARA U") |
| 100 | (?\e(1Y\e(B vowel-lower "VOWEL SIGN SARA UU") |
| 101 | (?\e(1Z\e(B invalid nil) |
| 102 | (?\e(1[\e(B vowel-upper "VOWEL SIGN MAI KONG") |
| 103 | (?\e(1\\e(B semivowel-lower "SEMIVOWEL SIGN LO") |
| 104 | (?\e(1]\e(B vowel-base "SEMIVOWEL SIGN SARA IA") |
| 105 | (?\e(1^\e(B invalid nil) |
| 106 | (?\e(1_\e(B invalid nil) |
| 107 | (?\e(1`\e(B vowel-base "VOWEL SIGN SARA EE") |
| 108 | (?\e(1a\e(B vowel-base "VOWEL SIGN SARA AA") |
| 109 | (?\e(1b\e(B vowel-base "VOWEL SIGN SARA OO") |
| 110 | (?\e(1c\e(B vowel-base "VOWEL SIGN SARA EI MAI MUAN\"") |
| 111 | (?\e(1d\e(B vowel-base "VOWEL SIGN SARA AI MAI MAY") |
| 112 | (?\e(1e\e(B invalid nil) |
| 113 | (?\e(1f\e(B special "KO LA (REPETITION)") |
| 114 | (?\e(1g\e(B invalid nil) |
| 115 | (?\e(1h\e(B tone "TONE MAI EK") |
| 116 | (?\e(1i\e(B tone "TONE MAI THO") |
| 117 | (?\e(1j\e(B tone "TONE MAI TI") |
| 118 | (?\e(1k\e(B tone "TONE MAI JADTAWAR") |
| 119 | (?\e(1l\e(B tone "CANCELLATION MARK") |
| 120 | (?\e(1m\e(B vowel-upper "VOWEL SIGN SARA OR") |
| 121 | (?\e(1n\e(B invalid nil) |
| 122 | (?\e(1o\e(B invalid nil) |
| 123 | (?\e(1p\e(B special "DIGIT ZERO") |
| 124 | (?\e(1q\e(B special "DIGIT ONE") |
| 125 | (?\e(1r\e(B special "DIGIT TWO") |
| 126 | (?\e(1s\e(B special "DIGIT THREE") |
| 127 | (?\e(1t\e(B special "DIGIT FOUR") |
| 128 | (?\e(1u\e(B special "DIGIT FIVE") |
| 129 | (?\e(1v\e(B special "DIGIT SIX") |
| 130 | (?\e(1w\e(B special "DIGIT SEVEN") |
| 131 | (?\e(1x\e(B special "DIGIT EIGHT") |
| 132 | (?\e(1y\e(B special "DIGIT NINE") |
| 133 | (?\e(1z\e(B invalid nil) |
| 134 | (?\e(1{\e(B invalid nil) |
| 135 | (?\e(1|\e(B consonant "LETTER NHOR NHUU" "MOUSE") |
| 136 | (?\e(1}\e(B consonant "LETTER MHOR MHAR" "DOG") |
| 137 | (?\e(1~\e(B invalid nil))) |
| 138 | elm) |
| 139 | (while l |
| 140 | (setq elm (car l) l (cdr l)) |
| 141 | (let ((char (car elm)) |
| 142 | (ptype (nth 1 elm))) |
| 143 | (cond ((eq ptype 'consonant) |
| 144 | (modify-category-entry char ?c lao-category-table)) |
| 145 | ((memq ptype '(vowel-upper vowel-lower)) |
| 146 | (modify-category-entry char ?v lao-category-table)) |
| 147 | ((eq ptype 'semivowel-lower) |
| 148 | (modify-category-entry char ?s lao-category-table)) |
| 149 | ((eq ptype 'tone) |
| 150 | (modify-category-entry char ?t lao-category-table))) |
| 151 | (put-char-code-property char 'phonetic-type ptype) |
| 152 | (put-char-code-property char 'name (nth 2 elm)) |
| 153 | (put-char-code-property char 'meaning (nth 3 elm))))) |
| 154 | |
| 155 | ;; The general composing rules are as follows: |
| 156 | ;; |
| 157 | ;; T |
| 158 | ;; V T V T |
| 159 | ;; CV -> C, CT -> C, CVT -> C, Cv -> C, CvT -> C |
| 160 | ;; v v |
| 161 | ;; T |
| 162 | ;; V T V T |
| 163 | ;; CsV -> C, CsT -> C, CsVT -> C, Csv -> C, CsvT -> C |
| 164 | ;; s s s s s |
| 165 | ;; v v |
| 166 | |
| 167 | |
| 168 | ;; where C: consonant, V: vowel upper, v: vowel lower, |
| 169 | ;; T: tone mark, s: semivowel lower |
| 170 | |
| 171 | (defvar lao-composition-pattern |
| 172 | "\\cc\\(\\ct\\|\\cv\\ct?\\|\\cs\\(\\ct\\|\\cv\\ct?\\)?\\)" |
| 173 | "Regular expression matching a Lao composite sequence.") |
| 174 | |
| 175 | ;;;###autoload |
| 176 | (defun lao-compose-string (str) |
| 177 | (with-category-table lao-category-table |
| 178 | (let ((idx 0)) |
| 179 | (while (setq idx (string-match lao-composition-pattern str idx)) |
| 180 | (compose-string str idx (match-end 0)) |
| 181 | (setq idx (match-end 0)))) |
| 182 | str)) |
| 183 | |
| 184 | ;;; LRT: Lao <-> Roman Transcription |
| 185 | |
| 186 | ;; Upper vowels and tone-marks are put on the letter. |
| 187 | ;; Semi-vowel-sign-lo and lower vowels are put under the letter. |
| 188 | |
| 189 | (defconst lao-transcription-consonant-alist |
| 190 | (sort '(;; single consonants |
| 191 | ("k" . "\e(1!\e(B") |
| 192 | ("kh" . "\e(1"\e(B") |
| 193 | ("qh" . "\e(1$\e(B") |
| 194 | ("ng" . "\e(1'\e(B") |
| 195 | ("j" . "\e(1(\e(B") |
| 196 | ("s" . "\e(1J\e(B") |
| 197 | ("x" . "\e(1*\e(B") |
| 198 | ("y" . "\e(1-\e(B") |
| 199 | ("d" . "\e(14\e(B") |
| 200 | ("t" . "\e(15\e(B") |
| 201 | ("th" . "\e(16\e(B") |
| 202 | ("dh" . "\e(17\e(B") |
| 203 | ("n" . "\e(19\e(B") |
| 204 | ("b" . "\e(1:\e(B") |
| 205 | ("p" . "\e(1;\e(B") |
| 206 | ("hp" . "\e(1<\e(B") |
| 207 | ("fh" . "\e(1=\e(B") |
| 208 | ("ph" . "\e(1>\e(B") |
| 209 | ("f" . "\e(1?\e(B") |
| 210 | ("m" . "\e(1A\e(B") |
| 211 | ("gn" . "\e(1B\e(B") |
| 212 | ("l" . "\e(1E\e(B") |
| 213 | ("r" . "\e(1C\e(B") |
| 214 | ("v" . "\e(1G\e(B") |
| 215 | ("w" . "\e(1G\e(B") |
| 216 | ("hh" . "\e(1K\e(B") |
| 217 | ("O" . "\e(1M\e(B") |
| 218 | ("h" . "\e(1N\e(B") |
| 219 | ("nh" . "\e(1|\e(B") |
| 220 | ("mh" . "\e(1}\e(B") |
| 221 | ("lh" . ["\e(1K\\e(B"]) |
| 222 | ;; double consonants |
| 223 | ("ngh" . ["\e(1K'\e(B"]) |
| 224 | ("yh" . ["\e(1K]\e(B"]) |
| 225 | ("wh" . ["\e(1KG\e(B"]) |
| 226 | ("hl" . ["\e(1KE\e(B"]) |
| 227 | ("hy" . ["\e(1K-\e(B"]) |
| 228 | ("hn" . ["\e(1K9\e(B"]) |
| 229 | ("hm" . ["\e(1KA\e(B"]) |
| 230 | ) |
| 231 | (function (lambda (x y) (> (length (car x)) (length (car y))))))) |
| 232 | |
| 233 | (defconst lao-transcription-semi-vowel-alist |
| 234 | '(("r" . "\e(1\\e(B"))) |
| 235 | |
| 236 | (defconst lao-transcription-vowel-alist |
| 237 | (sort '(("a" . "\e(1P\e(B") |
| 238 | ("ar" . "\e(1R\e(B") |
| 239 | ("i" . "\e(1T\e(B") |
| 240 | ("ii" . "\e(1U\e(B") |
| 241 | ("eu" . "\e(1V\e(B") |
| 242 | ("ur" . "\e(1W\e(B") |
| 243 | ("u" . "\e(1X\e(B") |
| 244 | ("uu" . "\e(1Y\e(B") |
| 245 | ("e" . ["\e(1`P\e(B"]) |
| 246 | ("ee" . "\e(1`\e(B") |
| 247 | ("ae" . ["\e(1aP\e(B"]) |
| 248 | ("aa" . "\e(1a\e(B") |
| 249 | ("o" . ["\e(1bP\e(B"]) |
| 250 | ("oo" . "\e(1b\e(B") |
| 251 | ("oe" . ["\e(1`RP\e(B"]) |
| 252 | ("or" . "\e(1m\e(B") |
| 253 | ("er" . ["\e(1`T\e(B"]) |
| 254 | ("ir" . ["\e(1`U\e(B"]) |
| 255 | ("ua" . ["\e(1[GP\e(B"]) |
| 256 | ("uaa" . ["\e(1[G\e(B"]) |
| 257 | ("ie" . ["\e(1`Q]P\e(B"]) |
| 258 | ("ia" . ["\e(1`Q]\e(B"]) |
| 259 | ("ea" . ["\e(1`VM\e(B"]) |
| 260 | ("eaa" . ["\e(1`WM\e(B"]) |
| 261 | ("ai" . "\e(1d\e(B") |
| 262 | ("ei" . "\e(1c\e(B") |
| 263 | ("ao" . ["\e(1`[R\e(B"]) |
| 264 | ("aM" . "\e(1S\e(B")) |
| 265 | (function (lambda (x y) (> (length (car x)) (length (car y))))))) |
| 266 | |
| 267 | ;; Maa-sakod is put at the tail. |
| 268 | (defconst lao-transcription-maa-sakod-alist |
| 269 | '(("k" . "\e(1!\e(B") |
| 270 | ("g" . "\e(1'\e(B") |
| 271 | ("y" . "\e(1-\e(B") |
| 272 | ("d" . "\e(14\e(B") |
| 273 | ("n" . "\e(19\e(B") |
| 274 | ("b" . "\e(1:\e(B") |
| 275 | ("m" . "\e(1A\e(B") |
| 276 | ("v" . "\e(1G\e(B") |
| 277 | ("w" . "\e(1G\e(B") |
| 278 | )) |
| 279 | |
| 280 | (defconst lao-transcription-tone-alist |
| 281 | '(("'" . "\e(1h\e(B") |
| 282 | ("\"" . "\e(1i\e(B") |
| 283 | ("^" . "\e(1j\e(B") |
| 284 | ("+" . "\e(1k\e(B") |
| 285 | ("~" . "\e(1l\e(B"))) |
| 286 | |
| 287 | (defconst lao-transcription-punctuation-alist |
| 288 | '(("\\0" . "\e(1p\e(B") |
| 289 | ("\\1" . "\e(1q\e(B") |
| 290 | ("\\2" . "\e(1r\e(B") |
| 291 | ("\\3" . "\e(1s\e(B") |
| 292 | ("\\4" . "\e(1t\e(B") |
| 293 | ("\\5" . "\e(1u\e(B") |
| 294 | ("\\6" . "\e(1v\e(B") |
| 295 | ("\\7" . "\e(1w\e(B") |
| 296 | ("\\8" . "\e(1x\e(B") |
| 297 | ("\\9" . "\e(1y\e(B") |
| 298 | ("\\\\" . "\e(1f\e(B") |
| 299 | ("\\$" . "\e(1O\e(B"))) |
| 300 | |
| 301 | (defconst lao-transcription-pattern |
| 302 | (concat |
| 303 | "\\(" |
| 304 | (mapconcat 'car lao-transcription-consonant-alist "\\|") |
| 305 | "\\)\\(" |
| 306 | (mapconcat 'car lao-transcription-semi-vowel-alist "\\|") |
| 307 | "\\)?\\(\\(" |
| 308 | (mapconcat 'car lao-transcription-vowel-alist "\\|") |
| 309 | "\\)\\(" |
| 310 | (mapconcat 'car lao-transcription-maa-sakod-alist "\\|") |
| 311 | "\\)?\\(" |
| 312 | (mapconcat (lambda (x) (regexp-quote (car x))) |
| 313 | lao-transcription-tone-alist "\\|") |
| 314 | "\\)?\\)?\\|" |
| 315 | (mapconcat (lambda (x) (regexp-quote (car x))) |
| 316 | lao-transcription-punctuation-alist "\\|") |
| 317 | ) |
| 318 | "Regexp of Roman transcription pattern for one Lao syllable.") |
| 319 | |
| 320 | (defconst lao-transcription-pattern |
| 321 | (concat |
| 322 | "\\(" |
| 323 | (regexp-opt (mapcar 'car lao-transcription-consonant-alist)) |
| 324 | "\\)\\(" |
| 325 | (regexp-opt (mapcar 'car lao-transcription-semi-vowel-alist)) |
| 326 | "\\)?\\(\\(" |
| 327 | (regexp-opt (mapcar 'car lao-transcription-vowel-alist)) |
| 328 | "\\)\\(" |
| 329 | (regexp-opt (mapcar 'car lao-transcription-maa-sakod-alist)) |
| 330 | "\\)?\\(" |
| 331 | (regexp-opt (mapcar 'car lao-transcription-tone-alist)) |
| 332 | "\\)?\\)?\\|" |
| 333 | (regexp-opt (mapcar 'car lao-transcription-punctuation-alist)) |
| 334 | ) |
| 335 | "Regexp of Roman transcription pattern for one Lao syllable.") |
| 336 | |
| 337 | (defconst lao-vowel-reordering-rule |
| 338 | '(("\e(1P\e(B" (0 ?\e(1P\e(B) (0 ?\e(1Q\e(B)) |
| 339 | ("\e(1R\e(B" (0 ?\e(1R\e(B)) |
| 340 | ("\e(1T\e(B" (0 ?\e(1U\e(B)) |
| 341 | ("\e(1U\e(B" (0 ?\e(1U\e(B)) |
| 342 | ("\e(1V\e(B" (0 ?\e(1V\e(B)) |
| 343 | ("\e(1W\e(B" (0 ?\e(1W\e(B)) |
| 344 | ("\e(1X\e(B" (0 ?\e(1X\e(B)) |
| 345 | ("\e(1Y\e(B" (0 ?\e(1Y\e(B)) |
| 346 | ("\e(1`P\e(B" (?\e(1`\e(B 0 ?\e(1P\e(B) (?\e(1`\e(B 0 ?\e(1Q\e(B)) |
| 347 | ("\e(1`\e(B" (?\e(1`\e(B 0)) |
| 348 | ("\e(1aP\e(B" (?\e(1a\e(B 0 ?\e(1P\e(B) (?\e(1a\e(B 0 ?\e(1Q\e(B)) |
| 349 | ("\e(1a\e(B" (?\e(1a\e(B 0)) |
| 350 | ("\e(1bP\e(B" (?\e(1b\e(B 0 ?\e(1P\e(B) (0 ?\e(1[\e(B) (?\e(1-\e(B ?\e(1b\e(B 0 ?\e(1Q\e(B) (?\e(1G\e(B ?\e(1b\e(B 0 ?\e(1Q\e(B)) |
| 351 | ("\e(1b\e(B" (?\e(1b\e(B 0)) |
| 352 | ("\e(1`RP\e(B" (?\e(1`\e(B 0 ?\e(1R\e(B ?\e(1P\e(B) (0 ?\e(1Q\e(B ?\e(1M\e(B)) |
| 353 | ("\e(1m\e(B" (0 ?\e(1m\e(B) (0 ?\e(1M\e(B)) |
| 354 | ("\e(1`T\e(B" (?\e(1`\e(B 0 ?\e(1T\e(B)) |
| 355 | ("\e(1`U\e(B" (?\e(1`\e(B 0 ?\e(1U\e(B)) |
| 356 | ("\e(1[GP\e(B" (0 ?\e(1[\e(B ?\e(1G\e(B ?\e(1P\e(B) (0 ?\e(1Q\e(B ?\e(1G\e(B)) |
| 357 | ("\e(1[G\e(B" (0 ?\e(1[\e(B ?\e(1G\e(B) (0 ?\e(1G\e(B)) |
| 358 | ("\e(1`Q]P\e(B" (?\e(1`\e(B 0 ?\e(1Q\e(B ?\e(1]\e(B ?\e(1P\e(B) (0 ?\e(1Q\e(B ?\e(1]\e(B)) |
| 359 | ("\e(1`Q]\e(B" (?\e(1`\e(B 0 ?\e(1Q\e(B ?\e(1]\e(B) (0 ?\e(1]\e(B)) |
| 360 | ("\e(1`VM\e(B" (?\e(1`\e(B 0 ?\e(1V\e(B ?\e(1M\e(B)) |
| 361 | ("\e(1`WM\e(B" (?\e(1`\e(B 0 ?\e(1W\e(B ?\e(1M\e(B)) |
| 362 | ("\e(1d\e(B" (?\e(1d\e(B 0)) |
| 363 | ("\e(1c\e(B" (?\e(1c\e(B 0)) |
| 364 | ("\e(1`[R\e(B" (?\e(1`\e(B 0 ?\e(1[\e(B ?\e(1R\e(B)) |
| 365 | ("\e(1S\e(B" (0 ?\e(1S\e(B))) |
| 366 | "Alist of Lao vowel string vs the corresponding re-ordering rule. |
| 367 | Each element has this form: |
| 368 | (VOWEL NO-MAA-SAKOD-RULE WITH-MAA-SAKOD-RULE (MAA-SAKOD-0 RULE-0) ...) |
| 369 | |
| 370 | VOWEL is a vowel string (e.g. \"\e(1`Q]P\e(B\"). |
| 371 | |
| 372 | NO-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL following a |
| 373 | consonant. It is a list vowel characters or 0. The element 0 |
| 374 | indicate the place to embed a consonant. |
| 375 | |
| 376 | Optional WITH-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL |
| 377 | follwoing a consonant and preceding a maa-sakod character. If it is |
| 378 | nil, NO-MAA-SAKOD-RULE is used. The maa-sakod character is alwasy |
| 379 | appended at the tail. |
| 380 | |
| 381 | For instance, rule `(\"\e(1`WM\e(B\" (?\e(1`\e(B t ?\e(1W\e(B ?\e(1M\e(B))' tells that this vowel |
| 382 | string following a consonant `\e(1!\e(B' should be re-ordered as \"\e(1`!WM\e(B\". |
| 383 | |
| 384 | Optional (MAA-SAKOD-n RULE-n) are rules specially applied to maa-sakod |
| 385 | character MAA-SAKOD-n.") |
| 386 | |
| 387 | ;;;###autoload |
| 388 | (defun lao-transcribe-single-roman-syllable-to-lao (from to &optional str) |
| 389 | "Transcribe a Romanized Lao syllable in the region FROM and TO to Lao string. |
| 390 | Only the first syllable is transcribed. |
| 391 | The value has the form: (START END LAO-STRING), where |
| 392 | START and END are the beggining and end positions of the Roman Lao syllable, |
| 393 | LAO-STRING is the Lao character transcription of it. |
| 394 | |
| 395 | Optional 3rd arg STR, if non-nil, is a string to search for Roman Lao |
| 396 | syllable. In that case, FROM and TO are indexes to STR." |
| 397 | (if str |
| 398 | (if (setq from (string-match lao-transcription-pattern str from)) |
| 399 | (progn |
| 400 | (if (>= from to) |
| 401 | (setq from nil) |
| 402 | (setq to (match-end 0))))) |
| 403 | (save-excursion |
| 404 | (goto-char from) |
| 405 | (if (setq to (re-search-forward lao-transcription-pattern to t)) |
| 406 | (setq from (match-beginning 0)) |
| 407 | (setq from nil)))) |
| 408 | (if from |
| 409 | (let* ((consonant (match-string 1 str)) |
| 410 | (semivowel (match-string 3 str)) |
| 411 | (vowel (match-string 5 str)) |
| 412 | (maa-sakod (match-string 8 str)) |
| 413 | (tone (match-string 9 str)) |
| 414 | lao-consonant lao-semivowel lao-vowel lao-maa-sakod lao-tone |
| 415 | clen cidx) |
| 416 | (setq to (match-end 0)) |
| 417 | (if (not consonant) |
| 418 | (setq str (cdr (assoc (match-string 0 str) |
| 419 | lao-transcription-punctuation-alist))) |
| 420 | (setq lao-consonant |
| 421 | (cdr (assoc consonant lao-transcription-consonant-alist))) |
| 422 | (if (vectorp lao-consonant) |
| 423 | (setq lao-consonant (aref lao-consonant 0))) |
| 424 | (setq clen (length lao-consonant)) |
| 425 | (if semivowel |
| 426 | ;; Include semivowel in STR. |
| 427 | (setq lao-semivowel |
| 428 | (cdr (assoc semivowel lao-transcription-semi-vowel-alist)) |
| 429 | str (if (= clen 1) |
| 430 | (concat lao-consonant lao-semivowel) |
| 431 | (concat (substring lao-consonant 0 1) lao-semivowel |
| 432 | (substring lao-consonant 1)))) |
| 433 | (setq str lao-consonant)) |
| 434 | (if vowel |
| 435 | (let (rule) |
| 436 | (setq lao-vowel |
| 437 | (cdr (assoc vowel lao-transcription-vowel-alist))) |
| 438 | (if (vectorp lao-vowel) |
| 439 | (setq lao-vowel (aref lao-vowel 0))) |
| 440 | (setq rule (assoc lao-vowel lao-vowel-reordering-rule)) |
| 441 | (if (null maa-sakod) |
| 442 | (setq rule (nth 1 rule)) |
| 443 | (setq lao-maa-sakod |
| 444 | (cdr (assoc maa-sakod lao-transcription-maa-sakod-alist)) |
| 445 | rule |
| 446 | (or (cdr (assq (aref lao-maa-sakod 0) (nthcdr 2 rule))) |
| 447 | (nth 2 rule) |
| 448 | (nth 1 rule)))) |
| 449 | (or rule |
| 450 | (error "Lao vowel %S has no re-ordering rule" lao-vowel)) |
| 451 | (setq lao-consonant str str "") |
| 452 | (while rule |
| 453 | (if (= (car rule) 0) |
| 454 | (setq str (concat str lao-consonant) |
| 455 | cidx (length str)) |
| 456 | (setq str (concat str (list (car rule))))) |
| 457 | (setq rule (cdr rule))) |
| 458 | (or cidx |
| 459 | (error "Lao vowel %S has malformed re-ordering rule" vowel)) |
| 460 | ;; Set CIDX to after upper or lower vowel if any. |
| 461 | (let ((len (length str))) |
| 462 | (while (and (< cidx len) |
| 463 | (memq (get-char-code-property (aref str cidx) |
| 464 | 'phonetic-type) |
| 465 | '(vowel-lower vowel-upper))) |
| 466 | (setq cidx (1+ cidx)))) |
| 467 | (if lao-maa-sakod |
| 468 | (setq str (concat str lao-maa-sakod))) |
| 469 | (if tone |
| 470 | (setq lao-tone |
| 471 | (cdr (assoc tone lao-transcription-tone-alist)) |
| 472 | str (concat (substring str 0 cidx) lao-tone |
| 473 | (substring str cidx))))))) |
| 474 | (list from to (lao-compose-string str))))) |
| 475 | |
| 476 | ;;;###autoload |
| 477 | (defun lao-transcribe-roman-to-lao-string (str) |
| 478 | "Transcribe Romanized Lao string STR to Lao character string." |
| 479 | (let ((from 0) |
| 480 | (to (length str)) |
| 481 | (lao-str "") |
| 482 | val) |
| 483 | (while (setq val (lao-transcribe-single-roman-syllable-to-lao from to str)) |
| 484 | (let ((start (car val)) |
| 485 | (end (nth 1 val)) |
| 486 | (lao (nth 2 val))) |
| 487 | (if (> start from) |
| 488 | (setq lao-str (concat lao-str (substring str from start) lao)) |
| 489 | (setq lao-str (concat lao-str lao))) |
| 490 | (setq from end))) |
| 491 | (if (< from to) |
| 492 | (concat lao-str (substring str from to)) |
| 493 | lao-str))) |
| 494 | |
| 495 | ;;;###autoload |
| 496 | (defun lao-composition-function (pos &optional string) |
| 497 | (setq pos (1- pos)) |
| 498 | (with-category-table lao-category-table |
| 499 | (if string |
| 500 | (if (and (>= pos 0) |
| 501 | (eq (string-match lao-composition-pattern string pos) pos)) |
| 502 | (prog1 (match-end 0) |
| 503 | (compose-string string pos (match-end 0)))) |
| 504 | (if (>= pos (point-min)) |
| 505 | (save-excursion |
| 506 | (goto-char pos) |
| 507 | (if (looking-at lao-composition-pattern) |
| 508 | (prog1 (match-end 0) |
| 509 | (compose-region pos (match-end 0))))))))) |
| 510 | |
| 511 | ;;;###autoload |
| 512 | (defun lao-compose-region (from to) |
| 513 | (interactive "r") |
| 514 | (save-restriction |
| 515 | (narrow-to-region from to) |
| 516 | (goto-char (point-min)) |
| 517 | (with-category-table lao-category-table |
| 518 | (while (re-search-forward lao-composition-pattern nil t) |
| 519 | (compose-region (match-beginning 0) (point)))))) |
| 520 | |
| 521 | ;; |
| 522 | (provide 'lao-util) |
| 523 | |
| 524 | ;;; arch-tag: 1f828781-3cb8-4695-88af-8f33222338ce |
| 525 | ;;; lao-util.el ends here |