;;; characters.el --- set syntax and category for multibyte characters
-;; Copyright (C) 1997, 2000-2011 Free Software Foundation, Inc.
+;; Copyright (C) 1997, 2000-2012 Free Software Foundation, Inc.
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
;; National Institute of Advanced Industrial Science and Technology (AIST)
Base characters (Unicode General Category L,N,P,S,Zs)")
(define-category ?^ "Combining
Combining diacritic or mark (Unicode General Category M)")
+
+;; bidi types
+(define-category ?R "Right-to-left (strong)
+Characters with \"strong\" right-to-left directionality, i.e.
+with R, AL, RLE, or RLO Unicode bidi character type.")
+
+(define-category ?L "Left-to-right (strong)
+Characters with \"strong\" left-to-right directionality, i.e.
+with L, LRE, or LRO Unicode bidi character type.")
+
\f
;;; Setting syntax and category.
("ྐ-ྐྵྺྻྼ" "w" ?0) ;
("ིེཻོཽྀ" "w" ?2) ; upper vowel
("ཾྂྃ྆྇ྈྉྊྋ" "w" ?2) ; upper modifier
- ("྄ཱུ༙༵༷" "w" ?3) ; lowel vowel/modifier
+ ("྄ཱུ༙༵༷" "w" ?3) ; lower vowel/modifier
("" "w" ?3) ; invisible vowel a
("༠-༩༪-༳" "w" ?6) ; digit
("་།-༒༔ཿ" "." ?|) ; line-break char
;; Tai Viet
(let ((deflist '(;; chars syntax category
- ((?ꪀ. ?ꪯ) "w" ?0) ; cosonant
+ ((?ꪀ. ?ꪯ) "w" ?0) ; consonant
("ꪱꪵꪶ" "w" ?1) ; vowel base
((?ꪹ . ?ꪽ) "w" ?1) ; vowel base
("ꪰꪲꪳꪷꪸꪾ" "w" ?2) ; vowel upper
(modify-category-entry x category))
chars)))))
+;; Bidi categories
+
+(map-char-table (lambda (key val)
+ (cond
+ ((memq val '(R AL RLO RLE))
+ (modify-category-entry key ?R))
+ ((memq val '(L LRE LRO))
+ (modify-category-entry key ?L))))
+ (unicode-property-table-internal 'bidi-class))
+
;; Latin
(modify-category-entry '(#x80 . #x024F) ?l)
;; relating Unicode categories to Emacs syntax codes.
;; NBSP isn't semantically interchangeable with other whitespace chars,
- ;; so it's more like punctation.
+ ;; so it's more like punctuation.
(set-case-syntax ? "." tbl)
(set-case-syntax ?¡ "." tbl)
(set-case-syntax ?¦ "_" tbl)
\f
;;; Setting unicode-category-table.
-;; This macro is to build unicode-category-table at compile time so
-;; that C code can access the table efficiently.
-(defmacro build-unicode-category-table ()
- (let ((table (make-char-table 'unicode-category-table nil)))
- (dotimes (i #x110000)
- (if (or (< i #xD800)
- (and (>= i #xF900) (< i #x30000))
- (and (>= i #xE0000) (< i #xE0200)))
- (aset table i (get-char-code-property i 'general-category))))
- (set-char-table-range table '(#xE000 . #xF8FF) 'Co)
- (set-char-table-range table '(#xF0000 . #xFFFFD) 'Co)
- (set-char-table-range table '(#x100000 . #x10FFFD) 'Co)
- (optimize-char-table table 'eq)
- table))
-
-(setq unicode-category-table (build-unicode-category-table))
+(setq unicode-category-table
+ (unicode-property-table-internal 'general-category))
(map-char-table #'(lambda (key val)
(if (and val
(or (and (/= (aref (symbol-name val) 0) ?M)
`acronym': display an acronym of the character in a box. The
acronym is taken from `char-acronym-table', which see.
`hex-code': display the hexadecimal character code in a box."
-
+ :version "24.1"
:type '(alist :key-type (symbol :tag "Character Group")
:value-type (symbol :tag "Display Method"))
:options '((c0-control