* dired.c (Ffile_attributes): Fix typo in docstring.

[bpt/emacs.git] / lisp / language / indian.el
diff --git a/lisp/language/indian.el b/lisp/language/indian.el

dissimilarity index 87%

index 0fed075..3414da2 100644 (file)
--- a/lisp/language/indian.el
+++ b/lisp/language/indian.el
@@ -1,179 +1,394 @@
-;;; indian.el --- Indian languages support -*- coding: iso-2022-7bit; -*-
-
-;; Copyright (C) 1997, 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007
-;;   Free Software Foundation, Inc.
-;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007
-;;   National Institute of Advanced Industrial Science and Technology (AIST)
-;;   Registration Number H14PRO021
-
-;; Maintainer:  KAWABATA, Taichi <kawabata@m17n.org>
-;; Keywords:   multilingual, i18n, Indian
-
-;; This file is part of GNU Emacs.
-
-;; GNU Emacs is free software; you can redistribute it and/or modify
-;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 2, or (at your option)
-;; any later version.
-
-;; GNU Emacs is distributed in the hope that it will be useful,
-;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-;; GNU General Public License for more details.
-
-;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs; see the file COPYING.  If not, write to the
-;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-;; Boston, MA 02110-1301, USA.
-
-;;; Commentary:
-
-;; This file defines in-is13194 coding system and relationship between
-;; indian-glyph character-set and various CDAC fonts.
-
-;;; Code:
-
-(make-coding-system
- 'in-is13194 2 ?D
- "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)."
- '(ascii indian-is13194 nil nil
-   nil ascii-eol)
- `((safe-chars . ,(let ((table (make-char-table 'safe-chars nil)))
-                   (set-char-table-range table 'indian-is13194 t)
-                   (dotimes (i 127)
-                     (aset table i t)
-                     (aset table (decode-char 'ucs (+ #x900 i)) t))
-                   table))
-   (post-read-conversion . in-is13194-post-read-conversion)
-   (pre-write-conversion . in-is13194-pre-write-conversion)))
-
-(define-coding-system-alias 'devanagari 'in-is13194)
-
-(defvar indian-font-foundry 'cdac
-  "Font foundry for Indian characters.
-Currently supported foundries are `cdac' and `akruti'.")
-
-(defvar indian-script-language-alist
-  '((devanagari (hindi sanskrit) nil)
-    (bengali (bengali assamese) nil)
-    (gurmukhi (punjabi) nil)
-    (gujarati (gujarati) nil)
-    (oriya (oriya) nil)
-    (tamil (tamil) nil)
-    (telugu (telugu) nil)
-    (kannada (kannada) nil)
-    (malayalam (malayalam) nil))
-  "Alist of Indian scripts vs the corresponding language list and font foundry.
-Each element has this form:
-
-  (SCRIPT LANGUAGE-LIST FONT-FOUNDRY)
-
-SCRIPT is one of Indian script names.
-
-LANGUAGE-LIST is a list of Indian langauge names SCRIPT is used for.
-The list is in the priority order.
-
-FONT-FOUNDRY is a font foundry representing a group of Indian
-fonts.  If the value is nil, the value of `indian-font-foundry'
-is used.")
-
-(defconst indian-font-char-index-table
-  '(                                   ; for which language(s)
-    ;; CDAC fonts
-    (#x0000 . cdac:dv-ttsurekh)                ; hindi, etc
-    (#x0100 . cdac:sd-ttsurekh)                ; sanskrit
-    (#x0200 . cdac:bn-ttdurga)         ; bengali
-    (#x0300 . cdac:tm-ttvalluvar)      ; tamil
-    (#x0400 . cdac:tl-tthemalatha)     ; telugu
-    (#x0500 . cdac:as-ttdurga)         ; assamese
-    (#x0600 . cdac:or-ttsarala)                ; oriya
-    (#x0700 . cdac:kn-ttuma)           ; kannada
-    (#x0800 . cdac:ml-ttkarthika)      ; malayalam
-    (#x0900 . cdac:gj-ttavantika)      ; gujarati
-    (#x0A00 . cdac:pn-ttamar)          ; punjabi
-
-    ;; AKRUTI fonts
-    (#x0B00 . akruti:dev)              ; hindi, etc
-    (#x0C00 . akruti:bng)              ; bengali
-    (#x0D00 . akruti:pnj)              ; punjabi
-    (#x0E00 . akruti:guj)              ; gujarati
-    (#x0F00 . akruti:ori)              ; oriya
-    (#x1000 . akruti:tml)              ; tamil
-    (#x1100 . akruti:tlg)              ; telugu
-    (#x1200 . akruti:knd)              ; kannada
-    (#x1300 . akruti:mal)              ; malayalam
-    )
-  "Alist of indices of `indian-glyph' character vs Indian font identifiers.
-Each element has this form: (INDEX . FONT-IDENTIFIER)
-
-INDEX is an index number of the first character in the charset
-`indian-glyph' assigned for glyphs in the font specified by
-FONT-IDENTIFIER.  Currently FONT-IDENTIFIERs are defined for CDAC
-and AKRUTI font groups.")
-
-(defun indian-font-char (index font-identifier)
-  "Return character of charset `indian-glyph' made from glyph index INDEX.
-FONT-IDENTIFIER is an identifier of an Indian font listed in the
-variable `indian-font-char-index-table'.  It specifies which
-font INDEX is for."
-  (if (or (< index 0) (> index 255))
-      (error "Invalid glyph index: %d" index))
-  (let ((start (car (rassq font-identifier indian-font-char-index-table))))
-    (if (not start)
-       (error "Unknown font identifier: %s" font-identifier))
-    (setq index (+ start index))
-    (make-char 'indian-glyph (+ (/ index 96) 32) (+ (% index 96) 32))))
-
-;; Return a range of characters (cons of min and max character) of the
-;; charset `indian-glyph' for displaying SCRIPT in LANGUAGE by a font
-;; of FOUNDRY.
-
-(defun indian-font-char-range (font-identifier)
-  (cons (indian-font-char 0 font-identifier)
-       (indian-font-char 255 font-identifier)))
-
-(defvar indian-script-table
-  '[
-    devanagari
-    sanskrit
-    bengali
-    tamil
-    telugu
-    assamese
-    oriya
-    kannada
-    malayalam
-    gujarati
-    punjabi
-    ]
-  "Vector of Indian script names.")
-
-(let ((len (length indian-script-table))
-      (i 0))
-  (while (< i len)
-    (put (aref indian-script-table i) 'indian-glyph-code-offset (* 256 i))
-    (setq i (1+ i))))
-
-(defvar indian-default-script 'devanagari
-  "Default script for Indian languages.
-Each Indian language environment sets this value
-to one of `indian-script-table' (which see).
-The default value is `devanagari'.")
-
-(define-ccl-program ccl-encode-indian-glyph-font
-  `(0
-    ;; Shorten (r1 = (((((r1 - 32) * 96) + r2) - 32) % 256))
-    (r1 = ((((r1 * 96) + r2) - ,(+ (* 32 96) 32)) % 256))))
-
-(setq font-ccl-encoder-alist
-      (cons (cons "-CDAC" 'ccl-encode-indian-glyph-font)
-           font-ccl-encoder-alist))
-
-(setq font-ccl-encoder-alist
-      (cons (cons "-AKRUTI" 'ccl-encode-indian-glyph-font)
-           font-ccl-encoder-alist))
-
-(provide 'indian)
-
-;;; arch-tag: 83aa8fc7-7ee2-4364-a6e5-498f5e3b8c2f
-;;; indian.el ends here
+;;; indian.el --- Indian languages support -*- coding: utf-8; -*-
+
+;; Copyright (C) 1997, 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+;;   Free Software Foundation, Inc.
+;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+;;   National Institute of Advanced Industrial Science and Technology (AIST)
+;;   Registration Number H14PRO021
+
+;; Maintainer:  Kenichi Handa <handa@m17n.org>
+;;             KAWABATA, Taichi <kawabata@m17n.org>
+;; Keywords:   multilingual, i18n, Indian
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; This file contains definitions of Indian language environments, and
+;; setups for displaying the scrtipts used there.
+
+;;; Code:
+
+(define-coding-system 'in-is13194-devanagari
+  "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)."
+  :coding-type 'iso-2022
+  :mnemonic ?D
+  :designation [ascii indian-is13194 nil nil]
+  :charset-list '(ascii indian-is13194)
+  :post-read-conversion 'in-is13194-post-read-conversion
+  :pre-write-conversion 'in-is13194-pre-write-conversion)
+
+(define-coding-system-alias 'devanagari 'in-is13194-devanagari)
+
+(set-language-info-alist
+ "Devanagari" '((charset unicode)
+               (coding-system utf-8)
+               (coding-priority utf-8)
+               (input-method . "dev-aiba")
+               (documentation . "\
+Such languages using Devanagari script as Hindi and Marathi
+are supported in this language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Bengali" '((charset unicode)
+            (coding-system utf-8)
+            (coding-priority utf-8)
+            (input-method . "bengali-itrans")
+            (documentation . "\
+Such languages using Bengali script as Bengali and Assamese
+are supported in this language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Punjabi" '((charset unicode)
+             (coding-system utf-8)
+             (coding-priority utf-8)
+             (input-method . "punjabi-itrans")
+             (documentation . "\
+North Indian language Punjabi is supported in this language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Gujarati" '((charset unicode)
+             (coding-system utf-8)
+             (coding-priority utf-8)
+             (input-method . "gujarati-itrans")
+             (documentation . "\
+North Indian language Gujarati is supported in this language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Oriya" '((charset unicode)
+             (coding-system utf-8)
+             (coding-priority utf-8)
+             (input-method . "oriya-itrans")
+             (documentation . "\
+Such languages using Oriya script as Oriya, Khonti, and Santali
+are supported in this language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Tamil" '((charset unicode)
+          (coding-system utf-8)
+          (coding-priority utf-8)
+          (input-method . "tamil-itrans")
+          (documentation . "\
+South Indian Language Tamil is supported in this language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Telugu" '((charset unicode)
+           (coding-system utf-8)
+           (coding-priority utf-8)
+           (input-method . "telugu-itrans")
+           (documentation . "\
+South Indian Language Telugu is supported in this language environment."))
+ '("Indian"))
+
+(set-language-info-alist
+ "Kannada" '((charset unicode)
+            (coding-system mule-utf-8)
+            (coding-priority mule-utf-8)
+            (input-method . "kannada-itrans")
+            (sample-text . "Kannada (ಕನ್ನಡ)  ನಮಸ್ಕಾರ")
+            (documentation . "\
+Kannada language and script is supported in this language
+environment.")) 
+ '("Indian"))
+
+(set-language-info-alist
+ "Malayalam" '((charset unicode)
+              (coding-system utf-8)
+              (coding-priority utf-8)
+              (input-method . "malayalam-itrans")
+              (documentation . "\
+South Indian language Malayalam is supported in this language environment."))
+ '("Indian"))
+
+;; Replace mnemonic characters in REGEXP according to TABLE.  TABLE is
+;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING).
+
+(defun indian-compose-regexp (regexp table)
+  (let ((case-fold-search nil))
+    (dolist (elt table)
+      (setq regexp (replace-regexp-in-string (car elt) (cdr elt) regexp t t)))
+    regexp))
+
+(defconst devanagari-composable-pattern
+  (let ((table
+        '(("a" . "[\u0900-\u0902]")    ; vowel modifier (above)
+          ("A" . "\u0903")             ; vowel modifier (post) 
+          ("V" . "[\u0904-\u0914\u0960-\u0961\u0972]") ; independent vowel
+          ("C" . "[\u0915-\u0939\u0958-\u095F\u0979-\u097F]") ; consonant
+          ("R" . "\u0930")             ; RA
+          ("n" . "\u093C")             ; NUKTA
+          ("v" . "[\u093E-\u094C\u094E\u0955\u0962-\u0963]") ; vowel sign
+          ("H" . "\u094D")             ; HALANT
+          ("s" . "[\u0951-\u0952]")    ; stress sign
+          ("t" . "[\u0953-\u0954]")    ; accent
+          ("N" . "\u200C")             ; ZWNJ
+          ("J" . "\u200D")             ; ZWJ
+          ("X" . "[\u0900-\u097F]")))) ; all coverage
+    (indian-compose-regexp
+     (concat
+      ;; syllables with an independent vowel, or
+      "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?s?t?A?\\|"
+      ;; consonant-based syllables, or
+      "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?s?t?A?\\)\\|"
+      ;; special consonant form, or
+      "JHR\\|"
+      ;; any other singleton characters
+      "X")
+     table))
+  "Regexp matching a composable sequence of Devanagari characters.")
+
+(defconst bengali-composable-pattern
+  (let ((table
+        '(("a" . "\u0981")             ; SIGN CANDRABINDU
+          ("A" . "[\u0982-\u0983]")    ; SIGN ANUSVARA .. VISARGA
+          ("V" . "[\u0985-\u0994\u09E0-\u09E1]") ; independent vowel
+          ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant
+          ("B" . "[\u09AC\u09AF-\u09B0\u09F0]")                ; BA, YA, RA
+          ("R" . "[\u09B0\u09F0]")             ; RA
+          ("n" . "\u09BC")             ; NUKTA
+          ("v" . "[\u09BE-\u09CC\u09D7\u09E2-\u09E3]") ; vowel sign
+          ("H" . "\u09CD")             ; HALANT
+          ("T" . "\u09CE")             ; KHANDA TA
+          ("N" . "\u200C")             ; ZWNJ
+          ("J" . "\u200D")             ; ZWJ
+          ("X" . "[\u0980-\u09FF]")))) ; all coverage
+    (indian-compose-regexp
+     (concat
+      ;; syllables with an independent vowel, or
+      "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|"
+      ;; consonant-based syllables, or
+      "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?\\)\\|"
+      ;; another syllables with an independent vowel, or
+      "\\(?:RH\\)?T\\|"
+      ;; special consonant form, or
+      "JHB\\|"
+      ;; any other singleton characters
+      "X")
+     table))
+  "Regexp matching a composable sequence of Bengali characters.")
+
+(defconst gurmukhi-composable-pattern
+  (let ((table
+        '(("a" . "[\u0A01-\u0A02]")    ; SIGN ADAK BINDI .. BINDI
+          ("A" . "\u0A03]")            ; SIGN VISARGA
+          ("V" . "[\u0A05-\u0A14]")    ; independent vowel
+          ("C" . "[\u0A15-\u0A39\u0A59-\u0A5E]")       ; consonant
+          ("Y" . "[\u0A2F\u0A30\u0A35\u0A39]") ; YA, RA, VA, HA
+          ("n" . "\u0A3C")             ; NUKTA
+          ("v" . "[\u0A3E-\u0A4C]")    ; vowel sign
+          ("H" . "\u0A4D")             ; VIRAMA
+          ("a" . "\u0A70")             ; TIPPI
+          ("N" . "\u200C")             ; ZWNJ
+          ("J" . "\u200D")             ; ZWJ
+          ("X" . "[\u0A00-\u0A7F]")))) ; all coverage
+    (indian-compose-regexp
+     (concat
+      ;; consonant-based syllables, or
+      "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
+      ;; syllables with an independent vowel, or
+      "Vn?\\(?:J?HY\\)?v*n?a?A?\\|"
+      ;; special consonant form, or
+      "JHY\\|"
+      ;; any other singleton characters
+      "X")
+     table))
+  "Regexp matching a composable sequence of Gurmukhi characters.")
+
+(defconst gujarati-composable-pattern
+  (let ((table
+        '(("a" . "[\u0A81-\u0A82]")    ; SIGN CANDRABINDU .. ANUSVARA
+          ("A" . "\u0A83]")            ; SIGN VISARGA
+          ("V" . "[\u0A85-\u0A94\u0AE0-\u0AE1]") ; independent vowel
+          ("C" . "[\u0A95-\u0AB9]")    ; consonant
+          ("R" . "\u0AB0")             ; RA
+          ("n" . "\u0ABC")             ; NUKTA
+          ("v" . "[\u0ABE-\u0ACC\u0AE2-\u0AE3]") ; vowel sign
+          ("H" . "\u0ACD")             ; VIRAMA
+          ("N" . "\u200C")             ; ZWNJ
+          ("J" . "\u200D")             ; ZWJ
+          ("X" . "[\u0A80-\u0AFF]")))) ; all coverage
+    (indian-compose-regexp
+     (concat
+      ;; syllables with an independent vowel, or
+      "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?A?\\|"
+      ;; consonant-based syllables, or
+      "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
+      ;; special consonant form, or
+      "JHR\\|"
+      ;; any other singleton characters
+      "X")
+     table))
+  "Regexp matching a composable sequence of Gujarati characters.")
+
+(defconst oriya-composable-pattern
+  (let ((table
+        '(("a" . "\u0B01")             ; SIGN CANDRABINDU
+          ("A" . "[\u0B02-\u0B03]")    ; SIGN ANUSVARA .. VISARGA
+          ("V" . "[\u0B05-\u0B14\u0B60-\u0B61]") ; independent vowel
+          ("C" . "[\u0B15-\u0B39\u0B5C-\u0B5D\u0B71]") ; consonant
+          ("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23-\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38-\u0B39]") ; consonant with below form
+          ("R" . "\u0B30")             ; RA
+          ("n" . "\u0B3C")             ; NUKTA
+          ("v" . "[\u0B3E-\u0B44\u0B47\u0B56-\u0B57\u0B62-\u0B63]") ; vowel sign
+          ("H" . "\u0B4D")             ; VIRAMA
+          ("N" . "\u200C")             ; ZWNJ
+          ("J" . "\u200D")             ; ZWJ
+          ("X" . "[\u0B00-\u0B7F]")))) ; all coverage
+    (indian-compose-regexp
+     (concat
+      ;; syllables with an independent vowel, or
+      "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|"
+      ;; consonant-based syllables, or
+      "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
+      ;; special consonant form, or
+      "JHB\\|"
+      ;; any other singleton characters
+      "X")
+     table))
+  "Regexp matching a composable sequence of Oriya characters.")
+
+(defconst tamil-composable-pattern
+  (let ((table
+        '(("a" . "\u0B82")             ; SIGN ANUSVARA
+          ("V" . "[\u0B85-\u0B94]")    ; independent vowel
+          ("C" . "[\u0B95-\u0BB9]")    ; consonant
+          ("v" . "[\u0BBE-\u0BC8\u0BD7]") ; vowel sign
+          ("H" . "\u0BCD")             ; VIRAMA
+          ("N" . "\u200C")             ; ZWNJ
+          ("J" . "\u200D")             ; ZWJ
+          ("X" . "[\u0B80-\u0BFF]")))) ; all coverage
+    (indian-compose-regexp
+     (concat
+      ;; consonant-based syllables, or
+      "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|"
+      ;; syllables with an independent vowel, or
+      "Vv*a?\\|"
+      ;; any other singleton characters
+      "X")
+     table))
+  "Regexp matching a composable sequence of Tamil characters.")
+
+(defconst telugu-composable-pattern
+  (let ((table
+        '(("a" . "[\u0C01-\u0C03]")    ; SIGN CANDRABINDU .. VISARGA
+          ("V" . "[\u0C05-\u0C14\u0C60-\u0C61]") ; independent vowel
+          ("C" . "[\u0C15-\u0C39\u0C58-\u0C59]") ; consonant
+          ("v" . "[\u0C3E-\u0C4C\u0C55-\u0C56\u0C62-\u0C63]")  ; vowel sign
+          ("H" . "\u0C4D")             ; VIRAMA
+          ("N" . "\u200C")             ; ZWNJ
+          ("J" . "\u200D")             ; ZWJ
+          ("X" . "[\u0C00-\u0C7F]")))) ; all coverage
+    (indian-compose-regexp
+     (concat
+      ;; consonant-based syllables, or
+      "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|"
+      ;; syllables with an independent vowel, or
+      "V\\(?:J?HC\\)?v*a?\\|"
+      ;; special consonant form, or
+      "JHC\\|"
+      ;; any other singleton characters
+      "X")
+     table))
+  "Regexp matching a composable sequence of Telugu characters.")
+
+(defconst kannada-composable-pattern
+  (let ((table
+        '(("A" . "[\u0C82-\u0C83]")    ; SIGN ANUSVARA .. VISARGA
+          ("V" . "[\u0C85-\u0C94\u0CE0-\u0CE1]") ; independent vowel
+          ("C" . "[\u0C95-\u0CB9\u0CDE]")        ; consonant
+          ("R" . "\u0CB0")             ; RA
+          ("n" . "\u0CBC")             ; NUKTA
+          ("v" . "[\u0CBE-\u0CCC\u0CD5-\u0CD6\u0CE2-\u0CE3]") ; vowel sign
+          ("H" . "\u0CCD")             ; VIRAMA
+          ("N" . "\u200C")             ; ZWNJ
+          ("J" . "\u200D")             ; ZWJ
+          ("X" . "[\u0C80-\u0CFF]")))) ; all coverage
+    (indian-compose-regexp
+     (concat
+      ;; syllables with an independent vowel, or
+      "\\(?:RH\\)?Vn?\\(?:J?HC\\)?v?A?\\|"
+      ;; consonant-based syllables, or
+      "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?A?\\)\\|"
+      ;; special consonant form, or
+      "JHC\\|"
+      ;; any other singleton characters
+      "X")
+     table))
+  "Regexp matching a composable sequence of Kannada characters.")
+
+(defconst malayalam-composable-pattern
+  (let ((table
+        '(("A" . "[\u0D02-\u0D03]")    ; SIGN ANUSVARA .. VISARGA
+          ("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel
+          ("C" . "[\u0D15-\u0D39]")              ; consonant 
+          ("Y" . "[\u0D2F-\u0D30\u0D32\u0D35]")  ; YA, RA, LA, VA
+          ("v" . "[\u0D3E-\u0D48\u0D57\u0D62-\u0D63]") ; postbase matra
+          ("H" . "\u0D4D")                       ; SIGN VIRAMA
+          ("N" . "\u200C")                       ; ZWNJ
+          ("J" . "\u200D")                       ; ZWJ
+          ("X" . "[\u0D00-\u0D7F]"))))           ; all coverage
+    (indian-compose-regexp
+     (concat
+      ;; consonant-based syllables, or
+      "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v?A?\\)\\|"
+      ;; syllables with an independent vowel, or
+      "V\\(?:J?HY\\)?v*?A?\\|"
+      ;; special consonant form, or
+      "JHY\\|"
+      ;; any other singleton characters
+      "X")
+     table))
+  "Regexp matching a composable sequence of Malayalam characters.")
+
+(let ((script-regexp-alist
+       `((devanagari . ,devanagari-composable-pattern)
+        (bengali . ,bengali-composable-pattern)
+        (gurmukhi . ,gurmukhi-composable-pattern)
+        (gujarati . ,gujarati-composable-pattern)
+        (oriya . ,oriya-composable-pattern)
+        (tamil . ,tamil-composable-pattern)
+        (telugu . ,telugu-composable-pattern)
+        (kannada . ,kannada-composable-pattern)
+        (malayalam . ,malayalam-composable-pattern))))
+  (map-char-table
+   #'(lambda (key val)
+       (let ((slot (assq val script-regexp-alist)))
+        (if slot
+            (set-char-table-range
+             composition-function-table key
+             (list (vector (cdr slot) 0 'font-shape-gstring))))))
+   char-script-table))
+
+(provide 'indian)
+
+;; arch-tag: 83aa8fc7-7ee2-4364-a6e5-498f5e3b8c2f
+;;; indian.el ends here