X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/c2a2e7b0b048b74a9b0f362747120245e68ed0e5..dc5d130eed60dbb3e55fb3cd31b3d4bd2333ab85:/admin/unidata/unidata-gen.el diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index 42e1cc0bc3..8b24b11622 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el @@ -1,6 +1,6 @@ ;; unidata-gen.el -- Create files containing character property data. -;; Copyright 2008-2013 (C) Free Software Foundation, Inc. +;; Copyright (C) 2008-2014 Free Software Foundation, Inc. ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 ;; National Institute of Advanced Industrial Science and Technology (AIST) @@ -194,8 +194,8 @@ Property value is an integer." 4 unidata-gen-table-symbol "uni-bidi.el" "Unicode bidi class. Property value is one of the following symbols: - L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, - AN, CS, NSM, BN, B, S, WS, ON" + L, LRE, LRO, LRI, R, AL, RLE, RLO, RLI, FSI, PDF, PDI, + EN, ES, ET, AN, CS, NSM, BN, B, S, WS, ON" unidata-describe-bidi-class ;; The assignment of default values to blocks of code points ;; follows the file DerivedBidiClass.txt from the Unicode @@ -205,7 +205,8 @@ Property value is one of the following symbols: (#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R)) ;; The order of elements must be in sync with bidi_type_t in ;; src/dispextern.h. - (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON)) + (L R EN AN BN B AL LRE LRO RLE RLO PDF LRI RLI FSI PDI + ES ET CS NSM S WS ON)) (decomposition 5 unidata-gen-table-decomposition "uni-decomposition.el" "Unicode decomposition mapping. @@ -397,12 +398,17 @@ is the character itself."))) ;; If VAL is one of VALn, just return n. ;; Otherwise, VAL-LIST is modified to this: ;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ... (VAL . n+1)) +;; +;; WARN is an optional warning to display when the value list is +;; extended, for property values that need to be in sync with other +;; parts of Emacs; currently only used for bidi-class. -(defun unidata-encode-val (val-list val) +(defun unidata-encode-val (val-list val &optional warn) (let ((slot (assoc val val-list)) val-code) (if slot (cdr slot) + (if warn (message warn val)) (setq val-code (length val-list)) (nconc val-list (list (cons val val-code))) val-code))) @@ -413,6 +419,16 @@ is the character itself."))) (let ((table (make-char-table 'char-code-property-table)) (prop-idx (unidata-prop-index prop)) (vec (make-vector 128 0)) + ;; When this warning is printed, there's a need to make the + ;; following changes: + ;; (1) update unidata-prop-alist with the new bidi-class values; + ;; (2) extend bidi_type_t enumeration on src/dispextern.h to + ;; include the new classes; + ;; (3) possibly update the assertion in bidi.c:bidi_check_type; and + ;; (4) possibly update the switch cases in + ;; bidi.c:bidi_get_type and bidi.c:bidi_get_category. + (bidi-warning "\ +** Found new bidi-class '%s', please update bidi.c and dispextern.h") tail elt range val val-code idx slot prev-range-data) (setq val-list (cons nil (copy-sequence val-list))) @@ -438,7 +454,9 @@ is the character itself."))) (setq elt (car tail) tail (cdr tail)) (setq range (car elt) val (funcall val-func (nth prop-idx elt))) - (setq val-code (if val (unidata-encode-val val-list val))) + (setq val-code (if val (unidata-encode-val val-list val + (and (eq prop 'bidi-class) + bidi-warning)))) (if (consp range) (when val-code (set-char-table-range table range val-code) @@ -486,7 +504,9 @@ is the character itself."))) (setq new-val (funcall val-func (nth prop-idx elt))) (if (not (eq val new-val)) (setq val new-val - val-code (if val (unidata-encode-val val-list val)))) + val-code (if val (unidata-encode-val + val-list val (and (eq prop 'bidi-class) + bidi-warning))))) (if val-code (aset vec (- range start) val-code)) (setq tail (cdr tail))) @@ -660,7 +680,7 @@ is the character itself."))) ;; Return a name of CHAR. VAL is the current value of (aref TABLE ;; CHAR). -(defun unidata-get-name (char val table) +(fset 'unidata-get-name '(lambda (char val table) (cond ((stringp val) (if (> (aref val 0) 0) @@ -742,15 +762,15 @@ is the character itself."))) ((eq sym 'CJK\ COMPATIBILITY\ IDEOGRAPH) (format "%s-%04X" sym char)) ((eq sym 'VARIATION\ SELECTOR) - (format "%s-%d" sym (+ (- char #xe0100) 17)))))))) + (format "%s-%d" sym (+ (- char #xe0100) 17))))))))) ;; Store VAL as the name of CHAR in TABLE. -(defun unidata-put-name (char val table) +(fset 'unidata-put-name '(lambda (char val table) (let ((current-val (aref table char))) (if (and (stringp current-val) (= (aref current-val 0) 0)) (funcall (char-table-extra-slot table 1) char current-val table)) - (aset table char val))) + (aset table char val)))) (defun unidata-get-decomposition (char val table) (cond @@ -964,7 +984,14 @@ is the character itself."))) (l nil) (idx 0) c) - (if (= len 0) + (if (or (= len 0) + ;; Unicode Standard, paragraph 4.8: "For all other + ;; Unicode code points of all other types (Control, + ;; Private-Use, Surrogate, Noncharacter, and Reserved), + ;; the value of the Name property is the null string." + ;; We already handle elsewhere all the characters except + ;; Cc, Control characters, which are handled here. + (string= str "")) nil (dotimes (i len) (setq c (aref str i)) @@ -977,15 +1004,9 @@ is the character itself."))) idx (1+ i))))) (nreverse (cons (intern (substring str idx)) l)))))) -(defun unidata--ensure-compiled (&rest funcs) - (dolist (fun funcs) - (or (byte-code-function-p (symbol-function fun)) - (byte-compile fun)))) - (defun unidata-gen-table-name (prop &rest ignore) (let* ((table (unidata-gen-table-word-list prop 'unidata-split-name)) (word-tables (char-table-extra-slot table 4))) - (unidata--ensure-compiled 'unidata-get-name 'unidata-put-name) (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-name)) (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-name)) @@ -1023,8 +1044,6 @@ is the character itself."))) (defun unidata-gen-table-decomposition (prop &rest ignore) (let* ((table (unidata-gen-table-word-list prop 'unidata-split-decomposition)) (word-tables (char-table-extra-slot table 4))) - (unidata--ensure-compiled 'unidata-get-decomposition - 'unidata-put-decomposition) (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-decomposition)) (set-char-table-extra-slot table 2 @@ -1034,7 +1053,8 @@ is the character itself."))) -(defun unidata-describe-general-category (val) +(fset + 'unidata-describe-general-category '(lambda (val) (cdr (assq val '((nil . "Uknown") (Lu . "Letter, Uppercase") @@ -1066,9 +1086,9 @@ is the character itself."))) (Cf . "Other, Format") (Cs . "Other, Surrogate") (Co . "Other, Private Use") - (Cn . "Other, Not Assigned"))))) + (Cn . "Other, Not Assigned")))))) -(defun unidata-describe-canonical-combining-class (val) +(fset 'unidata-describe-canonical-combining-class '(lambda (val) (cdr (assq val '((0 . "Spacing, split, enclosing, reordrant, and Tibetan subjoined") (1 . "Overlays and interior") @@ -1095,9 +1115,9 @@ is the character itself."))) (232 . "Above right") (233 . "Double below") (234 . "Double above") - (240 . "Below (iota subscript)"))))) + (240 . "Below (iota subscript)")))))) -(defun unidata-describe-bidi-class (val) +(fset 'unidata-describe-bidi-class '(lambda (val) (cdr (assq val '((L . "Left-to-Right") (LRE . "Left-to-Right Embedding") @@ -1107,6 +1127,10 @@ is the character itself."))) (RLE . "Right-to-Left Embedding") (RLO . "Right-to-Left Override") (PDF . "Pop Directional Format") + (LRI . "Left-to-Right Isolate") + (RLI . "Right-to-Left Isolate") + (FSI . "First Strong Isolate") + (PDI . "Pop Directional Isolate") (EN . "European Number") (ES . "European Number Separator") (ET . "European Number Terminator") @@ -1117,7 +1141,7 @@ is the character itself."))) (B . "Paragraph Separator") (S . "Segment Separator") (WS . "Whitespace") - (ON . "Other Neutrals"))))) + (ON . "Other Neutrals")))))) (defun unidata-describe-decomposition (val) (mapconcat @@ -1218,6 +1242,7 @@ is the character itself."))) (insert (format "(define-char-code-property '%S %S\n %S)\n" prop basename docstring)) (with-temp-buffer + (message "Using load-path %s" load-path) (message "Generating %s..." file) (when (file-exists-p file) (insert-file-contents file) @@ -1226,7 +1251,6 @@ is the character itself."))) (setq table (funcall generator prop default-value val-list)) (when describer (unless (subrp (symbol-function describer)) - (unidata--ensure-compiled describer) (setq describer (symbol-function describer))) (set-char-table-extra-slot table 3 describer)) (if (bobp)