Commit | Line | Data |
---|---|---|
6f2cdcd1 | 1 | ;;; hebrew.el --- support for Hebrew -*- coding: utf-8 -*- |
4ed46869 | 2 | |
114f9c96 | 3 | ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 |
d4877ac1 | 4 | ;; Free Software Foundation, Inc. |
7976eda0 | 5 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
114f9c96 | 6 | ;; 2005, 2006, 2007, 2008, 2009, 2010 |
eaa61218 KH |
7 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
8 | ;; Registration Number H14PRO021 | |
1fdc503d | 9 | |
8f924df7 KH |
10 | ;; Copyright (C) 2003 |
11 | ;; National Institute of Advanced Industrial Science and Technology (AIST) | |
12 | ;; Registration Number H13PRO009 | |
1fdc503d | 13 | |
4ed46869 KH |
14 | ;; Keywords: multilingual, Hebrew |
15 | ||
16 | ;; This file is part of GNU Emacs. | |
17 | ||
4936186e | 18 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
4ed46869 | 19 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
20 | ;; the Free Software Foundation, either version 3 of the License, or |
21 | ;; (at your option) any later version. | |
4ed46869 KH |
22 | |
23 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
24 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
25 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
26 | ;; GNU General Public License for more details. | |
27 | ||
28 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 29 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
4ed46869 KH |
30 | |
31 | ;;; Commentary: | |
32 | ||
eccac837 | 33 | ;; For Hebrew, the character set ISO8859-8 is supported. |
445559c9 | 34 | ;; See http://www.ecma.ch/ecma1/STAND/ECMA-121.HTM. |
eccac837 | 35 | ;; Windows-1255 is also supported. |
1fdc503d | 36 | |
4ed46869 KH |
37 | ;;; Code: |
38 | ||
e1915ab3 KH |
39 | (define-coding-system 'hebrew-iso-8bit |
40 | "ISO 2022 based 8-bit encoding for Hebrew (MIME:ISO-8859-8)." | |
41 | :coding-type 'charset | |
42 | :mnemonic ?8 | |
43 | :charset-list '(iso-8859-8) | |
d21363d3 | 44 | :mime-charset 'iso-8859-8) |
4ed46869 | 45 | |
71eabd24 | 46 | (define-coding-system-alias 'iso-8859-8 'hebrew-iso-8bit) |
4b9121fc | 47 | |
b81fa2f9 | 48 | ;; These are for Explicit and Implicit directionality information, as |
cd83d522 | 49 | ;; defined in RFC 1556. |
b81fa2f9 EZ |
50 | (define-coding-system-alias 'iso-8859-8-e 'hebrew-iso-8bit) |
51 | (define-coding-system-alias 'iso-8859-8-i 'hebrew-iso-8bit) | |
52 | ||
4ed46869 | 53 | (set-language-info-alist |
cd83d522 EZ |
54 | "Hebrew" '((tutorial . "TUTORIAL.he") |
55 | (charset iso-8859-8) | |
e4dfd4f7 | 56 | (coding-priority hebrew-iso-8bit) |
b523ee67 | 57 | (coding-system hebrew-iso-8bit windows-1255 cp862) |
e1915ab3 | 58 | (nonascii-translation . iso-8859-8) |
8f76845d KH |
59 | (input-method . "hebrew") |
60 | (unibyte-display . hebrew-iso-8bit) | |
6f2cdcd1 | 61 | (sample-text . "Hebrew שלום") |
ce6233c1 | 62 | (documentation . "Bidirectional editing is supported."))) |
4ed46869 | 63 | |
eccac837 DL |
64 | (set-language-info-alist |
65 | "Windows-1255" '((coding-priority windows-1255) | |
66 | (coding-system windows-1255) | |
5ef35063 WL |
67 | (documentation . "\ |
68 | Support for Windows-1255 encoding, e.g. for Yiddish. | |
ce6233c1 | 69 | Bidirectional editing is supported."))) |
eccac837 | 70 | |
5a41bd8b DL |
71 | (define-coding-system 'windows-1255 |
72 | "windows-1255 (Hebrew) encoding (MIME: WINDOWS-1255)" | |
73 | :coding-type 'charset | |
74 | :mnemonic ?h | |
75 | :charset-list '(windows-1255) | |
76 | :mime-charset 'windows-1255) | |
77 | (define-coding-system-alias 'cp1255 'windows-1255) | |
78 | ||
bac4bf38 | 79 | (define-coding-system 'cp862 |
b523ee67 | 80 | "DOS codepage 862 (Hebrew)" |
bac4bf38 DL |
81 | :coding-type 'charset |
82 | :mnemonic ?D | |
83 | :charset-list '(cp862) | |
84 | :mime-charset 'cp862) | |
85 | (define-coding-system-alias 'ibm862 'cp862) | |
86 | ||
6f2cdcd1 KH |
87 | ;; Return a nested alist of Hebrew character sequences vs the |
88 | ;; corresponding glyph of FONT-OBJECT. | |
89 | (defun hebrew-font-get-precomposed (font-object) | |
90 | (let ((precomposed (font-get font-object 'hebrew-precomposed)) | |
3b59c351 | 91 | ;; Vector of Hebrew precomposed characters. |
6f2cdcd1 KH |
92 | (chars [#xFB2A #xFB2B #xFB2C #xFB2D #xFB2E #xFB2F #xFB30 #xFB31 |
93 | #xFB32 #xFB33 #xFB34 #xFB35 #xFB36 #xFB38 #xFB39 #xFB3A | |
94 | #xFB3B #xFB3C #xFB3E #xFB40 #xFB41 #xFB43 #xFB44 #xFB46 | |
95 | #xFB47 #xFB48 #xFB49 #xFB4A #xFB4B #xFB4C #xFB4D #xFB4E]) | |
96 | ;; Vector of decomposition character sequences corresponding | |
97 | ;; to the above vector. | |
3b59c351 | 98 | (decomposed |
6f2cdcd1 KH |
99 | [[#x05E9 #x05C1] |
100 | [#x05E9 #x05C2] | |
101 | [#x05E9 #x05BC #x05C1] | |
102 | [#x05E9 #x05BC #x05C2] | |
103 | [#x05D0 #x05B7] | |
104 | [#x05D0 #x05B8] | |
105 | [#x05D0 #x05BC] | |
106 | [#x05D1 #x05BC] | |
107 | [#x05D2 #x05BC] | |
108 | [#x05D3 #x05BC] | |
109 | [#x05D4 #x05BC] | |
110 | [#x05D5 #x05BC] | |
111 | [#x05D6 #x05BC] | |
112 | [#x05D8 #x05BC] | |
113 | [#x05D9 #x05BC] | |
114 | [#x05DA #x05BC] | |
115 | [#x05DB #x05BC] | |
116 | [#x05DC #x05BC] | |
117 | [#x05DE #x05BC] | |
118 | [#x05E0 #x05BC] | |
119 | [#x05E1 #x05BC] | |
120 | [#x05E3 #x05BC] | |
121 | [#x05E4 #x05BC] | |
122 | [#x05E6 #x05BC] | |
123 | [#x05E7 #x05BC] | |
124 | [#x05E8 #x05BC] | |
125 | [#x05E9 #x05BC] | |
126 | [#x05EA #x05BC] | |
127 | [#x05D5 #x05B9] | |
128 | [#x05D1 #x05BF] | |
129 | [#x05DB #x05BF] | |
130 | [#x05E4 #x05BF]])) | |
131 | (unless precomposed | |
132 | (setq precomposed (list t)) | |
133 | (let ((gvec (font-get-glyphs font-object 0 (length chars) chars))) | |
134 | (dotimes (i (length chars)) | |
135 | (if (aref gvec i) | |
136 | (set-nested-alist (aref decomposed i) (aref gvec i) | |
137 | precomposed)))) | |
138 | ;; Cache the result in FONT-OBJECT's property. | |
139 | (font-put font-object 'hebrew-precomposed precomposed)) | |
140 | precomposed)) | |
141 | ||
142 | ;; Composition function for hebrew. GSTRING is made of a Hebrew base | |
143 | ;; character followed by Hebrew diacritical marks, or is made of | |
144 | ;; single Hebrew diacritical mark. Adjust GSTRING to display that | |
145 | ;; sequence properly. The basic strategy is: | |
146 | ;; | |
147 | ;; (1) If there's single diacritical, add padding space to the left | |
148 | ;; and right of the glyph. | |
149 | ;; | |
150 | ;; (2) If the font has OpenType features for Hebrew, ask the OTF | |
151 | ;; driver the whole work. | |
152 | ;; | |
153 | ;; (3) If the font has precomposed glyphs, use them as far as | |
154 | ;; possible. Adjust the remaining glyphs artificially. | |
155 | ||
f668ef02 | 156 | (defun hebrew-shape-gstring (gstring) |
6f2cdcd1 KH |
157 | (let* ((font (lgstring-font gstring)) |
158 | (otf (font-get font :otf)) | |
159 | (nchars (lgstring-char-len gstring)) | |
160 | header nglyphs base-width glyph precomposed val idx) | |
161 | (cond | |
162 | ((= nchars 1) | |
163 | ;; Independent diacritical mark. Add padding space to left or | |
164 | ;; right so that the glyph doesn't overlap with the surrounding | |
165 | ;; chars. | |
166 | (setq glyph (lgstring-glyph gstring 0)) | |
167 | (let ((width (lglyph-width glyph)) | |
168 | bearing) | |
169 | (if (< (setq bearing (lglyph-lbearing glyph)) 0) | |
170 | (lglyph-set-adjustment glyph bearing 0 (- width bearing))) | |
171 | (if (> (setq bearing (lglyph-rbearing glyph)) width) | |
172 | (lglyph-set-adjustment glyph 0 0 bearing)))) | |
173 | ||
174 | ((or (assq 'hebr (car otf)) (assq 'hebr (cdr otf))) | |
175 | ;; FONT has OpenType features for Hebrew. | |
176 | (font-shape-gstring gstring)) | |
177 | ||
178 | (t | |
179 | ;; FONT doesn't have OpenType features for Hebrew. | |
180 | ;; Try a precomposed glyph. | |
181 | ;; Now GSTRING is in this form: | |
182 | ;; [[FONT CHAR1 CHAR2 ... CHARn] nil GLYPH1 GLYPH2 ... GLYPHn nil ...] | |
183 | (setq precomposed (hebrew-font-get-precomposed font) | |
184 | header (lgstring-header gstring) | |
185 | val (lookup-nested-alist header precomposed nil 1)) | |
186 | (if (and (consp val) (vectorp (car val))) | |
187 | ;; All characters can be displayed by a single precomposed glyph. | |
188 | ;; Reform GSTRING to [HEADER nil PRECOMPOSED-GLYPH nil ...] | |
189 | (let ((glyph (copy-sequence (car val)))) | |
190 | (lglyph-set-from-to glyph 0 (1- nchars)) | |
191 | (lgstring-set-glyph gstring 0 glyph) | |
192 | (lgstring-set-glyph gstring 1 nil)) | |
193 | (if (and (integerp val) (> val 2) | |
194 | (setq glyph (lookup-nested-alist header precomposed val 1)) | |
195 | (consp glyph) (vectorp (car glyph))) | |
196 | ;; The first (1- VAL) characters can be displayed by a | |
197 | ;; precomposed glyph. Provided that VAL is 3, the first | |
198 | ;; two glyphs should be replaced by the precomposed glyph. | |
199 | ;; In that case, reform GSTRING to: | |
200 | ;; [HEADER nil PRECOMPOSED-GLYPH GLYPH3 ... GLYPHn nil ...] | |
201 | (let* ((ncmp (1- val)) ; number of composed glyphs | |
202 | (diff (1- ncmp))) ; number of reduced glyphs | |
203 | (setq glyph (copy-sequence (car glyph))) | |
204 | (lglyph-set-from-to glyph 0 (1- nchars)) | |
205 | (lgstring-set-glyph gstring 0 glyph) | |
206 | (setq idx ncmp) | |
207 | (while (< idx nchars) | |
208 | (setq glyph (lgstring-glyph gstring idx)) | |
209 | (lglyph-set-from-to glyph 0 (1- nchars)) | |
210 | (lgstring-set-glyph gstring (- idx diff) glyph) | |
211 | (setq idx (1+ idx))) | |
212 | (lgstring-set-glyph gstring (- idx diff) nil) | |
213 | (setq idx (- ncmp diff) | |
214 | nglyphs (- nchars diff))) | |
215 | (setq glyph (lgstring-glyph gstring 0)) | |
216 | (lglyph-set-from-to glyph 0 (1- nchars)) | |
217 | (setq idx 1 nglyphs nchars)) | |
218 | ;; Now IDX is an index to the first non-precomposed glyph. | |
219 | ;; Adjust positions of the remaining glyphs artificially. | |
220 | (setq base-width (lglyph-width (lgstring-glyph gstring 0))) | |
221 | (while (< idx nglyphs) | |
222 | (setq glyph (lgstring-glyph gstring idx)) | |
223 | (lglyph-set-from-to glyph 0 (1- nchars)) | |
224 | (if (>= (lglyph-lbearing glyph) (lglyph-width glyph)) | |
225 | ;; It seems that this glyph is designed to be rendered | |
226 | ;; before the base glyph. | |
227 | (lglyph-set-adjustment glyph (- base-width) 0 0) | |
228 | (if (>= (lglyph-lbearing glyph) 0) | |
229 | ;; Align the horizontal center of this glyph to the | |
230 | ;; horizontal center of the base glyph. | |
231 | (let ((width (- (lglyph-rbearing glyph) | |
232 | (lglyph-lbearing glyph)))) | |
233 | (lglyph-set-adjustment glyph | |
234 | (- (/ (- base-width width) 2) | |
235 | (lglyph-lbearing glyph) | |
236 | base-width) 0 0)))) | |
237 | (setq idx (1+ idx)))))) | |
238 | gstring)) | |
f668ef02 | 239 | |
2948599b KH |
240 | (let* ((base "[\u05D0-\u05F2]") |
241 | (combining "[\u0591-\u05BD\u05BF\u05C1-\u05C2\u05C4-\u05C5\u05C7]+") | |
242 | (pattern1 (concat base combining)) | |
243 | (pattern2 (concat base "\u200D" combining))) | |
f668ef02 KH |
244 | (set-char-table-range |
245 | composition-function-table '(#x591 . #x5C7) | |
6f2cdcd1 KH |
246 | (list (vector pattern2 3 'hebrew-shape-gstring) |
247 | (vector pattern2 2 'hebrew-shape-gstring) | |
f668ef02 | 248 | (vector pattern1 1 'hebrew-shape-gstring) |
6f2cdcd1 | 249 | [nil 0 hebrew-shape-gstring])) |
2948599b KH |
250 | ;; Exclude non-combining characters. |
251 | (set-char-table-range | |
252 | composition-function-table #x5BE nil) | |
f668ef02 KH |
253 | (set-char-table-range |
254 | composition-function-table #x5C0 nil) | |
2948599b KH |
255 | (set-char-table-range |
256 | composition-function-table #x5C3 nil) | |
f668ef02 KH |
257 | (set-char-table-range |
258 | composition-function-table #x5C6 nil)) | |
259 | ||
41da80b1 DL |
260 | (provide 'hebrew) |
261 | ||
cbee283d | 262 | ;; arch-tag: 3ca04f32-3f1e-498e-af46-8267498ba5d9 |
4b9121fc | 263 | ;;; hebrew.el ends here |