Commit | Line | Data |
---|---|---|
6f2cdcd1 | 1 | ;;; hebrew.el --- support for Hebrew -*- coding: utf-8 -*- |
4ed46869 | 2 | |
acaf905b | 3 | ;; Copyright (C) 2001-2012 Free Software Foundation, Inc. |
7976eda0 | 4 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
5df4f04c | 5 | ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
eaa61218 KH |
6 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
7 | ;; Registration Number H14PRO021 | |
1fdc503d | 8 | |
8f924df7 KH |
9 | ;; Copyright (C) 2003 |
10 | ;; National Institute of Advanced Industrial Science and Technology (AIST) | |
11 | ;; Registration Number H13PRO009 | |
1fdc503d | 12 | |
4ed46869 KH |
13 | ;; Keywords: multilingual, Hebrew |
14 | ||
15 | ;; This file is part of GNU Emacs. | |
16 | ||
4936186e | 17 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
4ed46869 | 18 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
19 | ;; the Free Software Foundation, either version 3 of the License, or |
20 | ;; (at your option) any later version. | |
4ed46869 KH |
21 | |
22 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
23 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
24 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
25 | ;; GNU General Public License for more details. | |
26 | ||
27 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 28 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
4ed46869 KH |
29 | |
30 | ;;; Commentary: | |
31 | ||
eccac837 | 32 | ;; For Hebrew, the character set ISO8859-8 is supported. |
445559c9 | 33 | ;; See http://www.ecma.ch/ecma1/STAND/ECMA-121.HTM. |
eccac837 | 34 | ;; Windows-1255 is also supported. |
1fdc503d | 35 | |
4ed46869 KH |
36 | ;;; Code: |
37 | ||
e1915ab3 KH |
38 | (define-coding-system 'hebrew-iso-8bit |
39 | "ISO 2022 based 8-bit encoding for Hebrew (MIME:ISO-8859-8)." | |
40 | :coding-type 'charset | |
41 | :mnemonic ?8 | |
42 | :charset-list '(iso-8859-8) | |
d21363d3 | 43 | :mime-charset 'iso-8859-8) |
4ed46869 | 44 | |
71eabd24 | 45 | (define-coding-system-alias 'iso-8859-8 'hebrew-iso-8bit) |
4b9121fc | 46 | |
b81fa2f9 | 47 | ;; These are for Explicit and Implicit directionality information, as |
cd83d522 | 48 | ;; defined in RFC 1556. |
b81fa2f9 EZ |
49 | (define-coding-system-alias 'iso-8859-8-e 'hebrew-iso-8bit) |
50 | (define-coding-system-alias 'iso-8859-8-i 'hebrew-iso-8bit) | |
51 | ||
4ed46869 | 52 | (set-language-info-alist |
cd83d522 EZ |
53 | "Hebrew" '((tutorial . "TUTORIAL.he") |
54 | (charset iso-8859-8) | |
e4dfd4f7 | 55 | (coding-priority hebrew-iso-8bit) |
b523ee67 | 56 | (coding-system hebrew-iso-8bit windows-1255 cp862) |
e1915ab3 | 57 | (nonascii-translation . iso-8859-8) |
8f76845d KH |
58 | (input-method . "hebrew") |
59 | (unibyte-display . hebrew-iso-8bit) | |
6f2cdcd1 | 60 | (sample-text . "Hebrew שלום") |
ce6233c1 | 61 | (documentation . "Bidirectional editing is supported."))) |
4ed46869 | 62 | |
eccac837 DL |
63 | (set-language-info-alist |
64 | "Windows-1255" '((coding-priority windows-1255) | |
65 | (coding-system windows-1255) | |
5ef35063 WL |
66 | (documentation . "\ |
67 | Support for Windows-1255 encoding, e.g. for Yiddish. | |
ce6233c1 | 68 | Bidirectional editing is supported."))) |
eccac837 | 69 | |
5a41bd8b DL |
70 | (define-coding-system 'windows-1255 |
71 | "windows-1255 (Hebrew) encoding (MIME: WINDOWS-1255)" | |
72 | :coding-type 'charset | |
73 | :mnemonic ?h | |
74 | :charset-list '(windows-1255) | |
75 | :mime-charset 'windows-1255) | |
76 | (define-coding-system-alias 'cp1255 'windows-1255) | |
77 | ||
bac4bf38 | 78 | (define-coding-system 'cp862 |
b523ee67 | 79 | "DOS codepage 862 (Hebrew)" |
bac4bf38 DL |
80 | :coding-type 'charset |
81 | :mnemonic ?D | |
82 | :charset-list '(cp862) | |
83 | :mime-charset 'cp862) | |
84 | (define-coding-system-alias 'ibm862 'cp862) | |
85 | ||
6f2cdcd1 KH |
86 | ;; Return a nested alist of Hebrew character sequences vs the |
87 | ;; corresponding glyph of FONT-OBJECT. | |
88 | (defun hebrew-font-get-precomposed (font-object) | |
89 | (let ((precomposed (font-get font-object 'hebrew-precomposed)) | |
3b59c351 | 90 | ;; Vector of Hebrew precomposed characters. |
6f2cdcd1 KH |
91 | (chars [#xFB2A #xFB2B #xFB2C #xFB2D #xFB2E #xFB2F #xFB30 #xFB31 |
92 | #xFB32 #xFB33 #xFB34 #xFB35 #xFB36 #xFB38 #xFB39 #xFB3A | |
93 | #xFB3B #xFB3C #xFB3E #xFB40 #xFB41 #xFB43 #xFB44 #xFB46 | |
94 | #xFB47 #xFB48 #xFB49 #xFB4A #xFB4B #xFB4C #xFB4D #xFB4E]) | |
95 | ;; Vector of decomposition character sequences corresponding | |
96 | ;; to the above vector. | |
3b59c351 | 97 | (decomposed |
6f2cdcd1 KH |
98 | [[#x05E9 #x05C1] |
99 | [#x05E9 #x05C2] | |
100 | [#x05E9 #x05BC #x05C1] | |
101 | [#x05E9 #x05BC #x05C2] | |
102 | [#x05D0 #x05B7] | |
103 | [#x05D0 #x05B8] | |
104 | [#x05D0 #x05BC] | |
105 | [#x05D1 #x05BC] | |
106 | [#x05D2 #x05BC] | |
107 | [#x05D3 #x05BC] | |
108 | [#x05D4 #x05BC] | |
109 | [#x05D5 #x05BC] | |
110 | [#x05D6 #x05BC] | |
111 | [#x05D8 #x05BC] | |
112 | [#x05D9 #x05BC] | |
113 | [#x05DA #x05BC] | |
114 | [#x05DB #x05BC] | |
115 | [#x05DC #x05BC] | |
116 | [#x05DE #x05BC] | |
117 | [#x05E0 #x05BC] | |
118 | [#x05E1 #x05BC] | |
119 | [#x05E3 #x05BC] | |
120 | [#x05E4 #x05BC] | |
121 | [#x05E6 #x05BC] | |
122 | [#x05E7 #x05BC] | |
123 | [#x05E8 #x05BC] | |
124 | [#x05E9 #x05BC] | |
125 | [#x05EA #x05BC] | |
126 | [#x05D5 #x05B9] | |
127 | [#x05D1 #x05BF] | |
128 | [#x05DB #x05BF] | |
129 | [#x05E4 #x05BF]])) | |
130 | (unless precomposed | |
131 | (setq precomposed (list t)) | |
132 | (let ((gvec (font-get-glyphs font-object 0 (length chars) chars))) | |
133 | (dotimes (i (length chars)) | |
134 | (if (aref gvec i) | |
135 | (set-nested-alist (aref decomposed i) (aref gvec i) | |
136 | precomposed)))) | |
137 | ;; Cache the result in FONT-OBJECT's property. | |
138 | (font-put font-object 'hebrew-precomposed precomposed)) | |
139 | precomposed)) | |
140 | ||
141 | ;; Composition function for hebrew. GSTRING is made of a Hebrew base | |
142 | ;; character followed by Hebrew diacritical marks, or is made of | |
143 | ;; single Hebrew diacritical mark. Adjust GSTRING to display that | |
144 | ;; sequence properly. The basic strategy is: | |
145 | ;; | |
146 | ;; (1) If there's single diacritical, add padding space to the left | |
147 | ;; and right of the glyph. | |
148 | ;; | |
149 | ;; (2) If the font has OpenType features for Hebrew, ask the OTF | |
150 | ;; driver the whole work. | |
151 | ;; | |
152 | ;; (3) If the font has precomposed glyphs, use them as far as | |
153 | ;; possible. Adjust the remaining glyphs artificially. | |
154 | ||
f668ef02 | 155 | (defun hebrew-shape-gstring (gstring) |
6f2cdcd1 KH |
156 | (let* ((font (lgstring-font gstring)) |
157 | (otf (font-get font :otf)) | |
158 | (nchars (lgstring-char-len gstring)) | |
159 | header nglyphs base-width glyph precomposed val idx) | |
160 | (cond | |
161 | ((= nchars 1) | |
162 | ;; Independent diacritical mark. Add padding space to left or | |
163 | ;; right so that the glyph doesn't overlap with the surrounding | |
164 | ;; chars. | |
165 | (setq glyph (lgstring-glyph gstring 0)) | |
166 | (let ((width (lglyph-width glyph)) | |
167 | bearing) | |
168 | (if (< (setq bearing (lglyph-lbearing glyph)) 0) | |
169 | (lglyph-set-adjustment glyph bearing 0 (- width bearing))) | |
170 | (if (> (setq bearing (lglyph-rbearing glyph)) width) | |
171 | (lglyph-set-adjustment glyph 0 0 bearing)))) | |
172 | ||
173 | ((or (assq 'hebr (car otf)) (assq 'hebr (cdr otf))) | |
174 | ;; FONT has OpenType features for Hebrew. | |
175 | (font-shape-gstring gstring)) | |
176 | ||
177 | (t | |
178 | ;; FONT doesn't have OpenType features for Hebrew. | |
179 | ;; Try a precomposed glyph. | |
180 | ;; Now GSTRING is in this form: | |
181 | ;; [[FONT CHAR1 CHAR2 ... CHARn] nil GLYPH1 GLYPH2 ... GLYPHn nil ...] | |
182 | (setq precomposed (hebrew-font-get-precomposed font) | |
183 | header (lgstring-header gstring) | |
184 | val (lookup-nested-alist header precomposed nil 1)) | |
185 | (if (and (consp val) (vectorp (car val))) | |
186 | ;; All characters can be displayed by a single precomposed glyph. | |
187 | ;; Reform GSTRING to [HEADER nil PRECOMPOSED-GLYPH nil ...] | |
188 | (let ((glyph (copy-sequence (car val)))) | |
189 | (lglyph-set-from-to glyph 0 (1- nchars)) | |
190 | (lgstring-set-glyph gstring 0 glyph) | |
191 | (lgstring-set-glyph gstring 1 nil)) | |
192 | (if (and (integerp val) (> val 2) | |
193 | (setq glyph (lookup-nested-alist header precomposed val 1)) | |
194 | (consp glyph) (vectorp (car glyph))) | |
195 | ;; The first (1- VAL) characters can be displayed by a | |
196 | ;; precomposed glyph. Provided that VAL is 3, the first | |
197 | ;; two glyphs should be replaced by the precomposed glyph. | |
198 | ;; In that case, reform GSTRING to: | |
199 | ;; [HEADER nil PRECOMPOSED-GLYPH GLYPH3 ... GLYPHn nil ...] | |
200 | (let* ((ncmp (1- val)) ; number of composed glyphs | |
201 | (diff (1- ncmp))) ; number of reduced glyphs | |
202 | (setq glyph (copy-sequence (car glyph))) | |
203 | (lglyph-set-from-to glyph 0 (1- nchars)) | |
204 | (lgstring-set-glyph gstring 0 glyph) | |
205 | (setq idx ncmp) | |
206 | (while (< idx nchars) | |
207 | (setq glyph (lgstring-glyph gstring idx)) | |
208 | (lglyph-set-from-to glyph 0 (1- nchars)) | |
209 | (lgstring-set-glyph gstring (- idx diff) glyph) | |
210 | (setq idx (1+ idx))) | |
211 | (lgstring-set-glyph gstring (- idx diff) nil) | |
212 | (setq idx (- ncmp diff) | |
213 | nglyphs (- nchars diff))) | |
214 | (setq glyph (lgstring-glyph gstring 0)) | |
215 | (lglyph-set-from-to glyph 0 (1- nchars)) | |
216 | (setq idx 1 nglyphs nchars)) | |
217 | ;; Now IDX is an index to the first non-precomposed glyph. | |
218 | ;; Adjust positions of the remaining glyphs artificially. | |
219 | (setq base-width (lglyph-width (lgstring-glyph gstring 0))) | |
220 | (while (< idx nglyphs) | |
221 | (setq glyph (lgstring-glyph gstring idx)) | |
222 | (lglyph-set-from-to glyph 0 (1- nchars)) | |
223 | (if (>= (lglyph-lbearing glyph) (lglyph-width glyph)) | |
224 | ;; It seems that this glyph is designed to be rendered | |
225 | ;; before the base glyph. | |
226 | (lglyph-set-adjustment glyph (- base-width) 0 0) | |
227 | (if (>= (lglyph-lbearing glyph) 0) | |
228 | ;; Align the horizontal center of this glyph to the | |
229 | ;; horizontal center of the base glyph. | |
230 | (let ((width (- (lglyph-rbearing glyph) | |
231 | (lglyph-lbearing glyph)))) | |
232 | (lglyph-set-adjustment glyph | |
233 | (- (/ (- base-width width) 2) | |
234 | (lglyph-lbearing glyph) | |
235 | base-width) 0 0)))) | |
236 | (setq idx (1+ idx)))))) | |
237 | gstring)) | |
f668ef02 | 238 | |
2948599b KH |
239 | (let* ((base "[\u05D0-\u05F2]") |
240 | (combining "[\u0591-\u05BD\u05BF\u05C1-\u05C2\u05C4-\u05C5\u05C7]+") | |
241 | (pattern1 (concat base combining)) | |
242 | (pattern2 (concat base "\u200D" combining))) | |
f668ef02 KH |
243 | (set-char-table-range |
244 | composition-function-table '(#x591 . #x5C7) | |
6f2cdcd1 KH |
245 | (list (vector pattern2 3 'hebrew-shape-gstring) |
246 | (vector pattern2 2 'hebrew-shape-gstring) | |
f668ef02 | 247 | (vector pattern1 1 'hebrew-shape-gstring) |
6f2cdcd1 | 248 | [nil 0 hebrew-shape-gstring])) |
2948599b KH |
249 | ;; Exclude non-combining characters. |
250 | (set-char-table-range | |
251 | composition-function-table #x5BE nil) | |
f668ef02 KH |
252 | (set-char-table-range |
253 | composition-function-table #x5C0 nil) | |
2948599b KH |
254 | (set-char-table-range |
255 | composition-function-table #x5C3 nil) | |
f668ef02 KH |
256 | (set-char-table-range |
257 | composition-function-table #x5C6 nil)) | |
258 | ||
41da80b1 DL |
259 | (provide 'hebrew) |
260 | ||
4b9121fc | 261 | ;;; hebrew.el ends here |