X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/dbc3b08c405a7b1c0ddb0fb0c98164b355802af5..8d64207425f5345576b77dc9308943ec56ad9327:/admin/unidata/unidata-gen.el diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index 75d7e2d98f..9fee8e46c8 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el @@ -1,14 +1,14 @@ ;; unidata-gen.el -- Create files containing character property data. -;; Copyright (C) 2005 +;; Copyright (C) 2005, 2006, 2007, 2008, 2009 ;; National Institute of Advanced Industrial Science and Technology (AIST) ;; Registration Number H13PRO009 ;; This file is part of GNU Emacs. -;; GNU Emacs is free software; you can redistribute it and/or modify +;; GNU Emacs is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. ;; GNU Emacs is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,9 +16,7 @@ ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; along with GNU Emacs. If not, see . ;;; Commentary: @@ -40,8 +38,11 @@ ;; (name, generic-category, etc), and FILE is a name of one of ;; the following files. ;; -;; uni-name.el, uni-cat.el, uni-comb.el, uni-bidi.el -;; It contains a single form of this format: +;; uni-name.el, uni-category.el, uni-combining.el, uni-bidi.el, +;; uni-decomposition.el, uni-decimal.el, uni-digit.el, uni-numeric.el, +;; uni-mirrored.el, uni-old-name.el, uni-comment.el, uni-uppercase.el, +;; uni-lowercase.el, uni-titlecase.el +;; They each contain a single form of this format: ;; (char-code-property-register PROP CHAR-TABLE) ;; where PROP is the same as above, and CHAR-TABLE is a ;; char-table containing property values in a compressed format. @@ -165,9 +166,8 @@ Property value is one of the following symbols: "Unicode decomposition mapping. Property value is a list of characters. The first element may be one of these symbols representing compatibility formatting tag: - , , , , , , , - , , , , , , , , - " + font, noBreak, initial, medial, final, isolated, circle, super, + sub, vertical, wide, narrow, small, square, fraction, compat" unidata-describe-decomposition) (decimal-digit-value 6 unidata-gen-table-integer "uni-decimal.el" @@ -178,9 +178,9 @@ Property value is an integer.") "Unicode numeric value (digit). Property value is an integer.") (numeric-value - 8 unidata-gen-table-symbol "uni-numeric.el" + 8 unidata-gen-table-numeric "uni-numeric.el" "Unicode numeric value (numeric). -Property value is an symbol.") +Property value is an integer or a floating point.") (mirrored 9 unidata-gen-table-symbol "uni-mirrored.el" "Unicode bidi mirrored flag. @@ -255,7 +255,7 @@ Property value is a character." (defun unidata-put-character (char val table) (or (characterp val) (not val) - (error "Not an character nor nil: %S" val)) + (error "Not a character nor nil: %S" val)) (let ((current-val (aref table char))) (unless (eq current-val val) (if (stringp current-val) @@ -364,7 +364,7 @@ Property value is a character." this-val)) ((> val 0) (aref val-table (1- val)))))) - + ;; Return a integer-type character property value of CHAR. VAL is the ;; current value of (aref TABLE CHAR). @@ -392,6 +392,34 @@ Property value is a character." (setq first-char (1+ first-char)))) this-val))))) +;; Return a numeric-type (integer or float) character property value +;; of CHAR. VAL is the current value of (aref TABLE CHAR). + +(defun unidata-get-numeric (char val table) + (cond + ((numberp val) + val) + ((stringp val) + (let ((val-table (char-table-extra-slot table 4)) + (first-char (lsh (lsh char -7) 7)) + (str val) + (len (length val)) + (idx 0) + this-val count) + (while (< idx len) + (setq val (aref str idx) idx (1+ idx) + count (if (< idx len) (aref str idx) 1)) + (setq val (and (> val 0) (aref val-table (1- val))) + count (if (< count 128) + 1 + (prog1 (- count 128) (setq idx (1+ idx))))) + (dotimes (i count) + (aset table first-char val) + (if (= first-char char) + (setq this-val val)) + (setq first-char (1+ first-char)))) + this-val)))) + ;; Store VAL (symbol) as a character property value of CHAR in TABLE. (defun unidata-put-symbol (char val table) @@ -415,6 +443,19 @@ Property value is a character." (funcall (char-table-extra-slot table 1) char current-val table)) (aset table char val)))) +;; Store VAL (integer or float) as a character property value of CHAR +;; in TABLE. + +(defun unidata-put-numeric (char val table) + (or (numberp val) + (not val) + (error "Not a number nor nil: %S" val)) + (let ((current-val (aref table char))) + (unless (equal current-val val) + (if (stringp current-val) + (funcall (char-table-extra-slot table 1) char current-val table)) + (aset table char val)))) + ;; Encode the character property value VAL into an integer value by ;; VAL-LIST. By side effect, VAL-LIST is modified. ;; VAL-LIST has this form: @@ -424,7 +465,7 @@ Property value is a character." ;; (t (VAL . (1+ VAL-CODE1)) (VAL1 . VAL-CODE1) (VAL2 . VAL-CODE2) ...) (defun unidata-encode-val (val-list val) - (let ((slot (assq val val-list)) + (let ((slot (assoc val val-list)) val-code) (if slot (cdr slot) @@ -492,7 +533,7 @@ Property value is a character." (set-char-table-range table (cons start limit) str)))))) (setq val-list (nreverse (cdr val-list))) - (set-char-table-extra-slot table 0 prop) + (set-char-table-extra-slot table 0 prop) (set-char-table-extra-slot table 4 (vconcat (mapcar 'car val-list))) table)) @@ -518,6 +559,22 @@ Property value is a character." (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-integer)) table)) +(defun unidata-gen-table-numeric (prop) + (let ((table (unidata-gen-table prop + #'(lambda (x) + (if (string-match "/" x) + (/ (float (string-to-number x)) + (string-to-number + (substring x (match-end 0)))) + (if (> (length x) 0) + (string-to-number x)))) + t))) + (byte-compile 'unidata-get-numeric) + (byte-compile 'unidata-put-numeric) + (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-numeric)) + (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-numeric)) + table)) + ;; WORD-LIST TABLE @@ -556,7 +613,7 @@ Property value is a character." (while (and l1 (eq (car l1) (car l2))) (setq beg (1+ beg) l1 (cdr l1) len1 (1- len1) l2 (cdr l2) len2 (1- len2))) - (while (and (< end len1) (< end len2) + (while (and (< end len1) (< end len2) (eq (nth (- len1 end 1) l1) (nth (- len2 end 1) l2))) (setq end (1+ end)))) (if (= (+ beg end) 0) @@ -628,7 +685,7 @@ Property value is a character." ;; CHAR). (defun unidata-get-name (char val table) - (cond + (cond ((stringp val) (if (> (aref val 0) 0) val @@ -659,7 +716,7 @@ Property value is a character." (setq word-list (nconc word-list (list (car l))) l (cdr l)))))) (setq word-list - (nconc word-list + (nconc word-list (list (symbol-name (unidata-decode-word c word-table)))) i (1+ i)))) @@ -698,7 +755,7 @@ Property value is a character." (V (/ (% char 588) 28)) ;; TIndex = SIndex % TCount (T (% char 28))) - (format "HANGUL SYLLABLE %s%s%s" + (format "HANGUL SYLLABLE %s%s%s" ;; U+110B is nil in this table. (or (aref (aref jamo-name-table 0) L) "") (aref (aref jamo-name-table 1) V) @@ -754,7 +811,7 @@ Property value is a character." (setq word-list (nconc word-list (list (car l))) l (cdr l)))))) (setq word-list - (nconc word-list + (nconc word-list (list (or (unidata-decode-word c word-table) c))) i (1+ i)))) (if (or word-list tail-list) @@ -771,11 +828,13 @@ Property value is a character." (L (+ #x1100 (/ char 588))) ;; V = VBase + (SIndex % NCount) * TCount (V (+ #x1161 (/ (% char 588) 28))) + ;; LV = SBase + (SIndex / TCount) * TCount + (LV (+ #xAC00 (* (/ char 28) 28))) ;; T = TBase + SIndex % TCount (T (+ #x11A7 (% char 28)))) (if (= T #x11A7) (list L V) - (list L V T)))) + (list LV T)))) )) @@ -933,7 +992,7 @@ Property value is a character." (if (= c 32) (setq l (cons (intern (substring str idx i)) l) idx (1+ i)) - (if (and (= c ?-) (< idx i) + (if (and (= c ?-) (< idx i) (< (1+ i) len) (/= (aref str (1+ i)) 32)) (setq l (cons '- (cons (intern (substring str idx i)) l)) idx (1+ i))))) @@ -969,11 +1028,11 @@ Property value is a character." (setq c (aref str i)) (if (= c 32) (setq l (if (= (aref str idx) ?<) - (cons (intern (substring str idx i)) l) + (cons (intern (substring str (1+ idx) (1- i))) l) (cons (string-to-number (substring str idx i) 16) l)) idx (1+ i)))) (if (= (aref str idx) ?<) - (setq l (cons (intern (substring str idx len)) l)) + (setq l (cons (intern (substring str (1+ idx) (1- len))) l)) (setq l (cons (string-to-number (substring str idx len) 16) l))) (nreverse l))))) @@ -985,7 +1044,7 @@ Property value is a character." (byte-compile 'unidata-put-decomposition) (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-decomposition)) - (set-char-table-extra-slot table 2 + (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-decomposition)) (set-char-table-extra-slot table 4 (car word-tables)) table)) @@ -1077,8 +1136,13 @@ Property value is a character." (ON . "Other Neutrals"))))) (defun unidata-describe-decomposition (val) - (mapconcat #'(lambda (x) (if (symbolp x) (symbol-name x) (string ?' x ?'))) - val " ")) + (mapconcat + #'(lambda (x) + (if (symbolp x) (symbol-name x) + (concat (string ?') + (compose-string (string x) 0 1 (string ?\t x ?\t)) + (string ?')))) + val " ")) ;; Verify if we can retrieve correct values from the generated ;; char-tables. @@ -1088,7 +1152,7 @@ Property value is a character." (let* ((prop (car elt)) (index (unidata-prop-index prop)) (generator (unidata-prop-generator prop)) - (table (progn + (table (progn (message "Generating %S table..." prop) (funcall generator prop))) (decoder (char-table-extra-slot table 1)) @@ -1114,7 +1178,7 @@ Property value is a character." (message "%S %04X" prop check) (setq check (+ check #x400))) (or (equal val1 val2) - (insert (format "> %04X %S\n< %04X %S\n" + (insert (format "> %04X %S\n< %04X %S\n" char val1 char val2))) (sit-for 0))))))) @@ -1126,7 +1190,7 @@ Property value is a character." (setq unidata-text-file (car command-line-args-left) command-line-args-left (cdr command-line-args-left))) (unidata-setup-list unidata-text-file) - (let ((coding-system-for-write 'utf-8) + (let ((coding-system-for-write 'utf-8-unix) (charprop-file "charprop.el")) (with-temp-file charprop-file (insert ";; Automatically generated by unidata-gen.el.\n") @@ -1150,7 +1214,10 @@ Property value is a character." (byte-compile describer) (setq describer (symbol-function describer))) (set-char-table-extra-slot table 3 describer)) - (insert ";; Automatically generated from UnicodeData.txt.\n" + (insert ";; Copyright (C) 1991-2009 Unicode, Inc. +;; This file was generated from the Unicode data file at +;; http://www.unicode.org/Public/UNIDATA/UnicodeData.txt. +;; See lisp/international/README for the copyright and permission notice.\n" (format "(define-char-code-property '%S %S %S)\n" prop table docstring) ";; Local Variables:\n"