* calendar/todo-mode.el: Fix two bugs.
[bpt/emacs.git] / lisp / language / hebrew.el
CommitLineData
6f2cdcd1 1;;; hebrew.el --- support for Hebrew -*- coding: utf-8 -*-
4ed46869 2
ba318903 3;; Copyright (C) 2001-2014 Free Software Foundation, Inc.
7976eda0 4;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5df4f04c 5;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
eaa61218
KH
6;; National Institute of Advanced Industrial Science and Technology (AIST)
7;; Registration Number H14PRO021
1fdc503d 8
8f924df7
KH
9;; Copyright (C) 2003
10;; National Institute of Advanced Industrial Science and Technology (AIST)
11;; Registration Number H13PRO009
1fdc503d 12
4ed46869
KH
13;; Keywords: multilingual, Hebrew
14
15;; This file is part of GNU Emacs.
16
4936186e 17;; GNU Emacs is free software: you can redistribute it and/or modify
4ed46869 18;; it under the terms of the GNU General Public License as published by
4936186e
GM
19;; the Free Software Foundation, either version 3 of the License, or
20;; (at your option) any later version.
4ed46869
KH
21
22;; GNU Emacs is distributed in the hope that it will be useful,
23;; but WITHOUT ANY WARRANTY; without even the implied warranty of
24;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25;; GNU General Public License for more details.
26
27;; You should have received a copy of the GNU General Public License
4936186e 28;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
4ed46869
KH
29
30;;; Commentary:
31
eccac837 32;; For Hebrew, the character set ISO8859-8 is supported.
445559c9 33;; See http://www.ecma.ch/ecma1/STAND/ECMA-121.HTM.
eccac837 34;; Windows-1255 is also supported.
1fdc503d 35
4ed46869
KH
36;;; Code:
37
e1915ab3
KH
38(define-coding-system 'hebrew-iso-8bit
39 "ISO 2022 based 8-bit encoding for Hebrew (MIME:ISO-8859-8)."
40 :coding-type 'charset
41 :mnemonic ?8
42 :charset-list '(iso-8859-8)
d21363d3 43 :mime-charset 'iso-8859-8)
4ed46869 44
71eabd24 45(define-coding-system-alias 'iso-8859-8 'hebrew-iso-8bit)
4b9121fc 46
b81fa2f9 47;; These are for Explicit and Implicit directionality information, as
cd83d522 48;; defined in RFC 1556.
b81fa2f9
EZ
49(define-coding-system-alias 'iso-8859-8-e 'hebrew-iso-8bit)
50(define-coding-system-alias 'iso-8859-8-i 'hebrew-iso-8bit)
51
4ed46869 52(set-language-info-alist
cd83d522
EZ
53 "Hebrew" '((tutorial . "TUTORIAL.he")
54 (charset iso-8859-8)
e4dfd4f7 55 (coding-priority hebrew-iso-8bit)
b523ee67 56 (coding-system hebrew-iso-8bit windows-1255 cp862)
e1915ab3 57 (nonascii-translation . iso-8859-8)
8f76845d
KH
58 (input-method . "hebrew")
59 (unibyte-display . hebrew-iso-8bit)
6f2cdcd1 60 (sample-text . "Hebrew שלום")
ce6233c1 61 (documentation . "Bidirectional editing is supported.")))
4ed46869 62
eccac837
DL
63(set-language-info-alist
64 "Windows-1255" '((coding-priority windows-1255)
65 (coding-system windows-1255)
5ef35063
WL
66 (documentation . "\
67Support for Windows-1255 encoding, e.g. for Yiddish.
ce6233c1 68Bidirectional editing is supported.")))
eccac837 69
5a41bd8b
DL
70(define-coding-system 'windows-1255
71 "windows-1255 (Hebrew) encoding (MIME: WINDOWS-1255)"
72 :coding-type 'charset
73 :mnemonic ?h
74 :charset-list '(windows-1255)
75 :mime-charset 'windows-1255)
76(define-coding-system-alias 'cp1255 'windows-1255)
77
bac4bf38 78(define-coding-system 'cp862
b523ee67 79 "DOS codepage 862 (Hebrew)"
bac4bf38
DL
80 :coding-type 'charset
81 :mnemonic ?D
82 :charset-list '(cp862)
83 :mime-charset 'cp862)
84(define-coding-system-alias 'ibm862 'cp862)
85
6f2cdcd1
KH
86;; Return a nested alist of Hebrew character sequences vs the
87;; corresponding glyph of FONT-OBJECT.
88(defun hebrew-font-get-precomposed (font-object)
89 (let ((precomposed (font-get font-object 'hebrew-precomposed))
3b59c351 90 ;; Vector of Hebrew precomposed characters.
6f2cdcd1
KH
91 (chars [#xFB2A #xFB2B #xFB2C #xFB2D #xFB2E #xFB2F #xFB30 #xFB31
92 #xFB32 #xFB33 #xFB34 #xFB35 #xFB36 #xFB38 #xFB39 #xFB3A
93 #xFB3B #xFB3C #xFB3E #xFB40 #xFB41 #xFB43 #xFB44 #xFB46
94 #xFB47 #xFB48 #xFB49 #xFB4A #xFB4B #xFB4C #xFB4D #xFB4E])
95 ;; Vector of decomposition character sequences corresponding
96 ;; to the above vector.
3b59c351 97 (decomposed
6f2cdcd1
KH
98 [[#x05E9 #x05C1]
99 [#x05E9 #x05C2]
100 [#x05E9 #x05BC #x05C1]
101 [#x05E9 #x05BC #x05C2]
102 [#x05D0 #x05B7]
103 [#x05D0 #x05B8]
104 [#x05D0 #x05BC]
105 [#x05D1 #x05BC]
106 [#x05D2 #x05BC]
107 [#x05D3 #x05BC]
108 [#x05D4 #x05BC]
109 [#x05D5 #x05BC]
110 [#x05D6 #x05BC]
111 [#x05D8 #x05BC]
112 [#x05D9 #x05BC]
113 [#x05DA #x05BC]
114 [#x05DB #x05BC]
115 [#x05DC #x05BC]
116 [#x05DE #x05BC]
117 [#x05E0 #x05BC]
118 [#x05E1 #x05BC]
119 [#x05E3 #x05BC]
120 [#x05E4 #x05BC]
121 [#x05E6 #x05BC]
122 [#x05E7 #x05BC]
123 [#x05E8 #x05BC]
124 [#x05E9 #x05BC]
125 [#x05EA #x05BC]
126 [#x05D5 #x05B9]
127 [#x05D1 #x05BF]
128 [#x05DB #x05BF]
129 [#x05E4 #x05BF]]))
130 (unless precomposed
131 (setq precomposed (list t))
132 (let ((gvec (font-get-glyphs font-object 0 (length chars) chars)))
133 (dotimes (i (length chars))
134 (if (aref gvec i)
135 (set-nested-alist (aref decomposed i) (aref gvec i)
136 precomposed))))
137 ;; Cache the result in FONT-OBJECT's property.
138 (font-put font-object 'hebrew-precomposed precomposed))
139 precomposed))
140
141;; Composition function for hebrew. GSTRING is made of a Hebrew base
142;; character followed by Hebrew diacritical marks, or is made of
143;; single Hebrew diacritical mark. Adjust GSTRING to display that
144;; sequence properly. The basic strategy is:
145;;
146;; (1) If there's single diacritical, add padding space to the left
147;; and right of the glyph.
148;;
149;; (2) If the font has OpenType features for Hebrew, ask the OTF
150;; driver the whole work.
151;;
152;; (3) If the font has precomposed glyphs, use them as far as
153;; possible. Adjust the remaining glyphs artificially.
154
f668ef02 155(defun hebrew-shape-gstring (gstring)
6f2cdcd1
KH
156 (let* ((font (lgstring-font gstring))
157 (otf (font-get font :otf))
158 (nchars (lgstring-char-len gstring))
159 header nglyphs base-width glyph precomposed val idx)
160 (cond
161 ((= nchars 1)
162 ;; Independent diacritical mark. Add padding space to left or
163 ;; right so that the glyph doesn't overlap with the surrounding
164 ;; chars.
165 (setq glyph (lgstring-glyph gstring 0))
166 (let ((width (lglyph-width glyph))
167 bearing)
168 (if (< (setq bearing (lglyph-lbearing glyph)) 0)
169 (lglyph-set-adjustment glyph bearing 0 (- width bearing)))
170 (if (> (setq bearing (lglyph-rbearing glyph)) width)
171 (lglyph-set-adjustment glyph 0 0 bearing))))
172
173 ((or (assq 'hebr (car otf)) (assq 'hebr (cdr otf)))
174 ;; FONT has OpenType features for Hebrew.
175 (font-shape-gstring gstring))
176
177 (t
178 ;; FONT doesn't have OpenType features for Hebrew.
179 ;; Try a precomposed glyph.
180 ;; Now GSTRING is in this form:
181 ;; [[FONT CHAR1 CHAR2 ... CHARn] nil GLYPH1 GLYPH2 ... GLYPHn nil ...]
182 (setq precomposed (hebrew-font-get-precomposed font)
183 header (lgstring-header gstring)
184 val (lookup-nested-alist header precomposed nil 1))
185 (if (and (consp val) (vectorp (car val)))
186 ;; All characters can be displayed by a single precomposed glyph.
187 ;; Reform GSTRING to [HEADER nil PRECOMPOSED-GLYPH nil ...]
188 (let ((glyph (copy-sequence (car val))))
189 (lglyph-set-from-to glyph 0 (1- nchars))
190 (lgstring-set-glyph gstring 0 glyph)
191 (lgstring-set-glyph gstring 1 nil))
192 (if (and (integerp val) (> val 2)
193 (setq glyph (lookup-nested-alist header precomposed val 1))
194 (consp glyph) (vectorp (car glyph)))
195 ;; The first (1- VAL) characters can be displayed by a
196 ;; precomposed glyph. Provided that VAL is 3, the first
197 ;; two glyphs should be replaced by the precomposed glyph.
198 ;; In that case, reform GSTRING to:
199 ;; [HEADER nil PRECOMPOSED-GLYPH GLYPH3 ... GLYPHn nil ...]
200 (let* ((ncmp (1- val)) ; number of composed glyphs
201 (diff (1- ncmp))) ; number of reduced glyphs
202 (setq glyph (copy-sequence (car glyph)))
203 (lglyph-set-from-to glyph 0 (1- nchars))
204 (lgstring-set-glyph gstring 0 glyph)
205 (setq idx ncmp)
206 (while (< idx nchars)
207 (setq glyph (lgstring-glyph gstring idx))
208 (lglyph-set-from-to glyph 0 (1- nchars))
209 (lgstring-set-glyph gstring (- idx diff) glyph)
210 (setq idx (1+ idx)))
211 (lgstring-set-glyph gstring (- idx diff) nil)
212 (setq idx (- ncmp diff)
213 nglyphs (- nchars diff)))
214 (setq glyph (lgstring-glyph gstring 0))
215 (lglyph-set-from-to glyph 0 (1- nchars))
216 (setq idx 1 nglyphs nchars))
217 ;; Now IDX is an index to the first non-precomposed glyph.
218 ;; Adjust positions of the remaining glyphs artificially.
219 (setq base-width (lglyph-width (lgstring-glyph gstring 0)))
220 (while (< idx nglyphs)
221 (setq glyph (lgstring-glyph gstring idx))
222 (lglyph-set-from-to glyph 0 (1- nchars))
223 (if (>= (lglyph-lbearing glyph) (lglyph-width glyph))
224 ;; It seems that this glyph is designed to be rendered
225 ;; before the base glyph.
226 (lglyph-set-adjustment glyph (- base-width) 0 0)
227 (if (>= (lglyph-lbearing glyph) 0)
228 ;; Align the horizontal center of this glyph to the
229 ;; horizontal center of the base glyph.
230 (let ((width (- (lglyph-rbearing glyph)
231 (lglyph-lbearing glyph))))
232 (lglyph-set-adjustment glyph
233 (- (/ (- base-width width) 2)
234 (lglyph-lbearing glyph)
235 base-width) 0 0))))
236 (setq idx (1+ idx))))))
237 gstring))
f668ef02 238
2948599b
KH
239(let* ((base "[\u05D0-\u05F2]")
240 (combining "[\u0591-\u05BD\u05BF\u05C1-\u05C2\u05C4-\u05C5\u05C7]+")
241 (pattern1 (concat base combining))
242 (pattern2 (concat base "\u200D" combining)))
f668ef02
KH
243 (set-char-table-range
244 composition-function-table '(#x591 . #x5C7)
6f2cdcd1
KH
245 (list (vector pattern2 3 'hebrew-shape-gstring)
246 (vector pattern2 2 'hebrew-shape-gstring)
f668ef02 247 (vector pattern1 1 'hebrew-shape-gstring)
6f2cdcd1 248 [nil 0 hebrew-shape-gstring]))
2948599b
KH
249 ;; Exclude non-combining characters.
250 (set-char-table-range
251 composition-function-table #x5BE nil)
f668ef02
KH
252 (set-char-table-range
253 composition-function-table #x5C0 nil)
2948599b
KH
254 (set-char-table-range
255 composition-function-table #x5C3 nil)
f668ef02
KH
256 (set-char-table-range
257 composition-function-table #x5C6 nil))
258
41da80b1
DL
259(provide 'hebrew)
260
4b9121fc 261;;; hebrew.el ends here