| 1 | ;;; hebrew.el --- support for Hebrew -*- coding: utf-8 -*- |
| 2 | |
| 3 | ;; Copyright (C) 2001-2014 Free Software Foundation, Inc. |
| 4 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
| 5 | ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
| 6 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
| 7 | ;; Registration Number H14PRO021 |
| 8 | |
| 9 | ;; Copyright (C) 2003 |
| 10 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
| 11 | ;; Registration Number H13PRO009 |
| 12 | |
| 13 | ;; Keywords: multilingual, Hebrew |
| 14 | |
| 15 | ;; This file is part of GNU Emacs. |
| 16 | |
| 17 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 18 | ;; it under the terms of the GNU General Public License as published by |
| 19 | ;; the Free Software Foundation, either version 3 of the License, or |
| 20 | ;; (at your option) any later version. |
| 21 | |
| 22 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 23 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 24 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 25 | ;; GNU General Public License for more details. |
| 26 | |
| 27 | ;; You should have received a copy of the GNU General Public License |
| 28 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 29 | |
| 30 | ;;; Commentary: |
| 31 | |
| 32 | ;; For Hebrew, the character set ISO8859-8 is supported. |
| 33 | ;; See http://www.ecma.ch/ecma1/STAND/ECMA-121.HTM. |
| 34 | ;; Windows-1255 is also supported. |
| 35 | |
| 36 | ;;; Code: |
| 37 | |
| 38 | (define-coding-system 'hebrew-iso-8bit |
| 39 | "ISO 2022 based 8-bit encoding for Hebrew (MIME:ISO-8859-8)." |
| 40 | :coding-type 'charset |
| 41 | :mnemonic ?8 |
| 42 | :charset-list '(iso-8859-8) |
| 43 | :mime-charset 'iso-8859-8) |
| 44 | |
| 45 | (define-coding-system-alias 'iso-8859-8 'hebrew-iso-8bit) |
| 46 | |
| 47 | ;; These are for Explicit and Implicit directionality information, as |
| 48 | ;; defined in RFC 1556. |
| 49 | (define-coding-system-alias 'iso-8859-8-e 'hebrew-iso-8bit) |
| 50 | (define-coding-system-alias 'iso-8859-8-i 'hebrew-iso-8bit) |
| 51 | |
| 52 | (set-language-info-alist |
| 53 | "Hebrew" '((tutorial . "TUTORIAL.he") |
| 54 | (charset iso-8859-8) |
| 55 | (coding-priority hebrew-iso-8bit) |
| 56 | (coding-system hebrew-iso-8bit windows-1255 cp862) |
| 57 | (nonascii-translation . iso-8859-8) |
| 58 | (input-method . "hebrew") |
| 59 | (unibyte-display . hebrew-iso-8bit) |
| 60 | (sample-text . "Hebrew שלום") |
| 61 | (documentation . "Bidirectional editing is supported."))) |
| 62 | |
| 63 | (set-language-info-alist |
| 64 | "Windows-1255" '((coding-priority windows-1255) |
| 65 | (coding-system windows-1255) |
| 66 | (documentation . "\ |
| 67 | Support for Windows-1255 encoding, e.g. for Yiddish. |
| 68 | Bidirectional editing is supported."))) |
| 69 | |
| 70 | (define-coding-system 'windows-1255 |
| 71 | "windows-1255 (Hebrew) encoding (MIME: WINDOWS-1255)" |
| 72 | :coding-type 'charset |
| 73 | :mnemonic ?h |
| 74 | :charset-list '(windows-1255) |
| 75 | :mime-charset 'windows-1255) |
| 76 | (define-coding-system-alias 'cp1255 'windows-1255) |
| 77 | |
| 78 | (define-coding-system 'cp862 |
| 79 | "DOS codepage 862 (Hebrew)" |
| 80 | :coding-type 'charset |
| 81 | :mnemonic ?D |
| 82 | :charset-list '(cp862) |
| 83 | :mime-charset 'cp862) |
| 84 | (define-coding-system-alias 'ibm862 'cp862) |
| 85 | |
| 86 | ;; Return a nested alist of Hebrew character sequences vs the |
| 87 | ;; corresponding glyph of FONT-OBJECT. |
| 88 | (defun hebrew-font-get-precomposed (font-object) |
| 89 | (let ((precomposed (font-get font-object 'hebrew-precomposed)) |
| 90 | ;; Vector of Hebrew precomposed characters. |
| 91 | (chars [#xFB2A #xFB2B #xFB2C #xFB2D #xFB2E #xFB2F #xFB30 #xFB31 |
| 92 | #xFB32 #xFB33 #xFB34 #xFB35 #xFB36 #xFB38 #xFB39 #xFB3A |
| 93 | #xFB3B #xFB3C #xFB3E #xFB40 #xFB41 #xFB43 #xFB44 #xFB46 |
| 94 | #xFB47 #xFB48 #xFB49 #xFB4A #xFB4B #xFB4C #xFB4D #xFB4E]) |
| 95 | ;; Vector of decomposition character sequences corresponding |
| 96 | ;; to the above vector. |
| 97 | (decomposed |
| 98 | [[#x05E9 #x05C1] |
| 99 | [#x05E9 #x05C2] |
| 100 | [#x05E9 #x05BC #x05C1] |
| 101 | [#x05E9 #x05BC #x05C2] |
| 102 | [#x05D0 #x05B7] |
| 103 | [#x05D0 #x05B8] |
| 104 | [#x05D0 #x05BC] |
| 105 | [#x05D1 #x05BC] |
| 106 | [#x05D2 #x05BC] |
| 107 | [#x05D3 #x05BC] |
| 108 | [#x05D4 #x05BC] |
| 109 | [#x05D5 #x05BC] |
| 110 | [#x05D6 #x05BC] |
| 111 | [#x05D8 #x05BC] |
| 112 | [#x05D9 #x05BC] |
| 113 | [#x05DA #x05BC] |
| 114 | [#x05DB #x05BC] |
| 115 | [#x05DC #x05BC] |
| 116 | [#x05DE #x05BC] |
| 117 | [#x05E0 #x05BC] |
| 118 | [#x05E1 #x05BC] |
| 119 | [#x05E3 #x05BC] |
| 120 | [#x05E4 #x05BC] |
| 121 | [#x05E6 #x05BC] |
| 122 | [#x05E7 #x05BC] |
| 123 | [#x05E8 #x05BC] |
| 124 | [#x05E9 #x05BC] |
| 125 | [#x05EA #x05BC] |
| 126 | [#x05D5 #x05B9] |
| 127 | [#x05D1 #x05BF] |
| 128 | [#x05DB #x05BF] |
| 129 | [#x05E4 #x05BF]])) |
| 130 | (unless precomposed |
| 131 | (setq precomposed (list t)) |
| 132 | (let ((gvec (font-get-glyphs font-object 0 (length chars) chars))) |
| 133 | (dotimes (i (length chars)) |
| 134 | (if (aref gvec i) |
| 135 | (set-nested-alist (aref decomposed i) (aref gvec i) |
| 136 | precomposed)))) |
| 137 | ;; Cache the result in FONT-OBJECT's property. |
| 138 | (font-put font-object 'hebrew-precomposed precomposed)) |
| 139 | precomposed)) |
| 140 | |
| 141 | ;; Composition function for hebrew. GSTRING is made of a Hebrew base |
| 142 | ;; character followed by Hebrew diacritical marks, or is made of |
| 143 | ;; single Hebrew diacritical mark. Adjust GSTRING to display that |
| 144 | ;; sequence properly. The basic strategy is: |
| 145 | ;; |
| 146 | ;; (1) If there's single diacritical, add padding space to the left |
| 147 | ;; and right of the glyph. |
| 148 | ;; |
| 149 | ;; (2) If the font has OpenType features for Hebrew, ask the OTF |
| 150 | ;; driver the whole work. |
| 151 | ;; |
| 152 | ;; (3) If the font has precomposed glyphs, use them as far as |
| 153 | ;; possible. Adjust the remaining glyphs artificially. |
| 154 | |
| 155 | (defun hebrew-shape-gstring (gstring) |
| 156 | (let* ((font (lgstring-font gstring)) |
| 157 | (otf (font-get font :otf)) |
| 158 | (nchars (lgstring-char-len gstring)) |
| 159 | header nglyphs base-width glyph precomposed val idx) |
| 160 | (cond |
| 161 | ((= nchars 1) |
| 162 | ;; Independent diacritical mark. Add padding space to left or |
| 163 | ;; right so that the glyph doesn't overlap with the surrounding |
| 164 | ;; chars. |
| 165 | (setq glyph (lgstring-glyph gstring 0)) |
| 166 | (let ((width (lglyph-width glyph)) |
| 167 | bearing) |
| 168 | (if (< (setq bearing (lglyph-lbearing glyph)) 0) |
| 169 | (lglyph-set-adjustment glyph bearing 0 (- width bearing))) |
| 170 | (if (> (setq bearing (lglyph-rbearing glyph)) width) |
| 171 | (lglyph-set-adjustment glyph 0 0 bearing)))) |
| 172 | |
| 173 | ((or (assq 'hebr (car otf)) (assq 'hebr (cdr otf))) |
| 174 | ;; FONT has OpenType features for Hebrew. |
| 175 | (font-shape-gstring gstring)) |
| 176 | |
| 177 | (t |
| 178 | ;; FONT doesn't have OpenType features for Hebrew. |
| 179 | ;; Try a precomposed glyph. |
| 180 | ;; Now GSTRING is in this form: |
| 181 | ;; [[FONT CHAR1 CHAR2 ... CHARn] nil GLYPH1 GLYPH2 ... GLYPHn nil ...] |
| 182 | (setq precomposed (hebrew-font-get-precomposed font) |
| 183 | header (lgstring-header gstring) |
| 184 | val (lookup-nested-alist header precomposed nil 1)) |
| 185 | (if (and (consp val) (vectorp (car val))) |
| 186 | ;; All characters can be displayed by a single precomposed glyph. |
| 187 | ;; Reform GSTRING to [HEADER nil PRECOMPOSED-GLYPH nil ...] |
| 188 | (let ((glyph (copy-sequence (car val)))) |
| 189 | (lglyph-set-from-to glyph 0 (1- nchars)) |
| 190 | (lgstring-set-glyph gstring 0 glyph) |
| 191 | (lgstring-set-glyph gstring 1 nil)) |
| 192 | (if (and (integerp val) (> val 2) |
| 193 | (setq glyph (lookup-nested-alist header precomposed val 1)) |
| 194 | (consp glyph) (vectorp (car glyph))) |
| 195 | ;; The first (1- VAL) characters can be displayed by a |
| 196 | ;; precomposed glyph. Provided that VAL is 3, the first |
| 197 | ;; two glyphs should be replaced by the precomposed glyph. |
| 198 | ;; In that case, reform GSTRING to: |
| 199 | ;; [HEADER nil PRECOMPOSED-GLYPH GLYPH3 ... GLYPHn nil ...] |
| 200 | (let* ((ncmp (1- val)) ; number of composed glyphs |
| 201 | (diff (1- ncmp))) ; number of reduced glyphs |
| 202 | (setq glyph (copy-sequence (car glyph))) |
| 203 | (lglyph-set-from-to glyph 0 (1- nchars)) |
| 204 | (lgstring-set-glyph gstring 0 glyph) |
| 205 | (setq idx ncmp) |
| 206 | (while (< idx nchars) |
| 207 | (setq glyph (lgstring-glyph gstring idx)) |
| 208 | (lglyph-set-from-to glyph 0 (1- nchars)) |
| 209 | (lgstring-set-glyph gstring (- idx diff) glyph) |
| 210 | (setq idx (1+ idx))) |
| 211 | (lgstring-set-glyph gstring (- idx diff) nil) |
| 212 | (setq idx (- ncmp diff) |
| 213 | nglyphs (- nchars diff))) |
| 214 | (setq glyph (lgstring-glyph gstring 0)) |
| 215 | (lglyph-set-from-to glyph 0 (1- nchars)) |
| 216 | (setq idx 1 nglyphs nchars)) |
| 217 | ;; Now IDX is an index to the first non-precomposed glyph. |
| 218 | ;; Adjust positions of the remaining glyphs artificially. |
| 219 | (setq base-width (lglyph-width (lgstring-glyph gstring 0))) |
| 220 | (while (< idx nglyphs) |
| 221 | (setq glyph (lgstring-glyph gstring idx)) |
| 222 | (lglyph-set-from-to glyph 0 (1- nchars)) |
| 223 | (if (>= (lglyph-lbearing glyph) (lglyph-width glyph)) |
| 224 | ;; It seems that this glyph is designed to be rendered |
| 225 | ;; before the base glyph. |
| 226 | (lglyph-set-adjustment glyph (- base-width) 0 0) |
| 227 | (if (>= (lglyph-lbearing glyph) 0) |
| 228 | ;; Align the horizontal center of this glyph to the |
| 229 | ;; horizontal center of the base glyph. |
| 230 | (let ((width (- (lglyph-rbearing glyph) |
| 231 | (lglyph-lbearing glyph)))) |
| 232 | (lglyph-set-adjustment glyph |
| 233 | (- (/ (- base-width width) 2) |
| 234 | (lglyph-lbearing glyph) |
| 235 | base-width) 0 0)))) |
| 236 | (setq idx (1+ idx)))))) |
| 237 | gstring)) |
| 238 | |
| 239 | (let* ((base "[\u05D0-\u05F2]") |
| 240 | (combining "[\u0591-\u05BD\u05BF\u05C1-\u05C2\u05C4-\u05C5\u05C7]+") |
| 241 | (pattern1 (concat base combining)) |
| 242 | (pattern2 (concat base "\u200D" combining))) |
| 243 | (set-char-table-range |
| 244 | composition-function-table '(#x591 . #x5C7) |
| 245 | (list (vector pattern2 3 'hebrew-shape-gstring) |
| 246 | (vector pattern2 2 'hebrew-shape-gstring) |
| 247 | (vector pattern1 1 'hebrew-shape-gstring) |
| 248 | [nil 0 hebrew-shape-gstring])) |
| 249 | ;; Exclude non-combining characters. |
| 250 | (set-char-table-range |
| 251 | composition-function-table #x5BE nil) |
| 252 | (set-char-table-range |
| 253 | composition-function-table #x5C0 nil) |
| 254 | (set-char-table-range |
| 255 | composition-function-table #x5C3 nil) |
| 256 | (set-char-table-range |
| 257 | composition-function-table #x5C6 nil)) |
| 258 | |
| 259 | (provide 'hebrew) |
| 260 | |
| 261 | ;;; hebrew.el ends here |