X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/86a0ed4c65e8335e26cc1b5380cecd3c36710adc..114f9c96795aff3b51b9060d7c9c1b77debcc99a:/lisp/international/ucs-normalize.el diff --git a/lisp/international/ucs-normalize.el b/lisp/international/ucs-normalize.el index c879861812..5061e50058 100644 --- a/lisp/international/ucs-normalize.el +++ b/lisp/international/ucs-normalize.el @@ -1,6 +1,6 @@ ;;; ucs-normalize.el --- Unicode normalization NFC/NFD/NFKD/NFKC -;; Copyright (C) 2009 +;; Copyright (C) 2009, 2010 ;; Free Software Foundation, Inc. ;; Author: Taichi Kawabata @@ -23,7 +23,7 @@ ;;; Commentary: ;; -;; This program has passed the NormalizationTest-5.1.0.txt. +;; This program has passed the NormalizationTest-5.2.0.txt. ;; ;; References: ;; http://www.unicode.org/reports/tr15/ @@ -88,8 +88,8 @@ ;; with previous character, then the beginning of the block is ;; the searched character. If searched character is combining ;; character, then previous character will be the target -;; character -;; (2) end of the block +;; character +;; (2) end of the block ;; Block ends at non-composable starter character. ;; ;; C. Decomposition (`ucs-normalize-block') @@ -108,10 +108,12 @@ ;;; Code: -(defconst ucs-normalize-version "1.1beta2") +(defconst ucs-normalize-version "1.2") (eval-when-compile (require 'cl)) +(declare-function nfd "ucs-normalize" (char)) + (eval-when-compile (defconst ucs-normalize-composition-exclusions @@ -128,11 +130,11 @@ #x1D1BF #x1D1C0) "Composition Exclusion List. This list is taken from - http://www.unicode.org/Public/UNIDATA/CompositionExclusions-5.1.0.txt") + http://www.unicode.org/Public/UNIDATA/5.2/CompositionExclusions.txt") ;; Unicode ranges that decompositions & combinings are defined. (defvar check-range nil) - (setq check-range '((#x00a0 . #x3400) (#xA600 . #xAC00) (#xF900 . #x10fff) (#x1d000 . #x1dfff) (#x2f800 . #x2faff))) + (setq check-range '((#x00a0 . #x3400) (#xA600 . #xAC00) (#xF900 . #x110ff) (#x1d000 . #x1dfff) (#x1f100 . #x1f2ff) (#x2f800 . #x2faff))) ;; Basic normalization functions (defun nfd (char) @@ -166,7 +168,7 @@ (eval-when-compile - (defvar combining-chars nil) + (defvar combining-chars nil) (setq combining-chars nil) (defvar decomposition-pair-to-composition nil) (setq decomposition-pair-to-composition nil) @@ -199,9 +201,9 @@ check-range)) (setq combining-chars - (append combining-chars + (append combining-chars '(?ᅡ ?ᅢ ?ᅣ ?ᅤ ?ᅥ ?ᅦ ?ᅧ ?ᅨ ?ᅩ ?ᅪ - ?ᅫ ?ᅬ ?ᅭ ?ᅮ ?ᅯ ?ᅰ ?ᅱ ?ᅲ ?ᅳ ?ᅴ ?ᅵ + ?ᅫ ?ᅬ ?ᅭ ?ᅮ ?ᅯ ?ᅰ ?ᅱ ?ᅲ ?ᅳ ?ᅴ ?ᅵ ?ᆨ ?ᆩ ?ᆪ ?ᆫ ?ᆬ ?ᆭ ?ᆮ ?ᆯ ?ᆰ ?ᆱ ?ᆲ ?ᆳ ?ᆴ ?ᆵ ?ᆶ ?ᆷ ?ᆸ ?ᆹ ?ᆺ ?ᆻ ?ᆼ ?ᆽ ?ᆾ ?ᆿ ?ᇀ ?ᇁ ?ᇂ))) ) @@ -251,6 +253,12 @@ Note that Hangul are excluded.") (setq ucs-normalize-combining-chars-regexp (eval-when-compile (concat (regexp-opt (mapcar 'char-to-string combining-chars)) "+"))) +(declare-function decomposition-translation-alist "ucs-normalize" + (decomposition-function)) +(declare-function decomposition-char-recursively "ucs-normalize" + (char decomposition-function)) +(declare-function alist-list-to-vector "ucs-normalize" (alist)) + (eval-when-compile (defun decomposition-translation-alist (decomposition-function) @@ -262,7 +270,7 @@ Note that Hangul are excluded.") (if decomposition (setq alist (cons (cons char (apply 'append - (mapcar (lambda (x) + (mapcar (lambda (x) (decomposition-char-recursively x decomposition-function)) decomposition))) @@ -274,7 +282,7 @@ Note that Hangul are excluded.") (let ((decomposition (funcall decomposition-function char))) (if decomposition (apply 'append - (mapcar (lambda (x) + (mapcar (lambda (x) (decomposition-char-recursively x decomposition-function)) decomposition)) (list char)))) @@ -295,8 +303,8 @@ Note that Hangul are excluded.") (setq ucs-normalize-hangul-translation-alist (let ((i 0) entries) (while (< i 11172) - (setq entries - (cons (cons (+ #xac00 i) + (setq entries + (cons (cons (+ #xac00 i) (if (= 0 (% i 28)) (vector (+ #x1100 (/ i 588)) (+ #x1161 (/ (% i 588) 28))) @@ -307,7 +315,7 @@ Note that Hangul are excluded.") i (1+ i))) entries)) (defun ucs-normalize-make-translation-table-from-alist (alist) - (make-translation-table-from-alist + (make-translation-table-from-alist (append alist ucs-normalize-hangul-translation-alist))) (define-translation-table 'ucs-normalize-nfd-table @@ -318,7 +326,7 @@ Note that Hangul are excluded.") (ucs-normalize-make-translation-table-from-alist (eval-when-compile hfs-nfd-alist))) (defun ucs-normalize-sort (chars) - "Sort by canonical combining class of chars." + "Sort by canonical combining class of CHARS." (sort chars (lambda (ch1 ch2) (< (ucs-normalize-ccc ch1) (ucs-normalize-ccc ch2))))) @@ -364,20 +372,24 @@ If COMPOSITION-PREDICATE is not given, then do nothing." chars))) ) +(declare-function quick-check-list "ucs-normalize" + (decomposition-translation &optional composition-predicate)) +(declare-function quick-check-list-to-regexp "ucs-normalize" (quick-check-list)) + (eval-when-compile (defun quick-check-list (decomposition-translation &optional composition-predicate) "Quick-Check List for DECOMPOSITION-TRANSLATION and COMPOSITION-PREDICATE. It includes Singletons, CompositionExclusions, and Non-Starter -decomposition. " +decomposition." (let (entries decomposition composition) (mapc (lambda (start-end) (do ((i (car start-end) (+ i 1))) ((> i (cdr start-end))) (setq decomposition (string-to-list - (with-temp-buffer + (with-temp-buffer (insert i) (translate-region 1 2 decomposition-translation) (buffer-string)))) @@ -585,26 +597,36 @@ COMPOSITION-PREDICATE will be used to compose region." (save-excursion (save-restriction (narrow-to-region (point) (+ (point) len)) - (let ((buffer-modified-p (buffer-modified-p))) - (ucs-normalize-HFS-NFC-region (point-min) (point-max)) - (- (point-max) (point-min)))))) + (ucs-normalize-HFS-NFC-region (point-min) (point-max)) + (- (point-max) (point-min))))) + +;; Pre-write conversion for `utf-8-hfs'. +(defun ucs-normalize-hfs-nfd-pre-write-conversion (from to) + (let ((old-buf (current-buffer))) + (set-buffer (generate-new-buffer " *temp*")) + (if (stringp from) + (insert from) + (insert-buffer-substring old-buf from to)) + (ucs-normalize-HFS-NFD-region (point-min) (point-max)) + nil)) ;;; coding-system definition (define-coding-system 'utf-8-hfs - "UTF-8 base coding system with normalization on decoding. + "UTF-8 based coding system for MacOS HFS file names. The singleton characters in HFS normalization exclusion will not -be decomposed. It doesn't perform normalization on encoding." +be decomposed." :coding-type 'utf-8 :mnemonic ?U :charset-list '(unicode) - ;; :decode-translation-table (not necessary) :post-read-conversion 'ucs-normalize-hfs-nfd-post-read-conversion - ;; NFD encoder is not necessary because MacOS will automatically do it. - ;; :encode-translation-table 'nfd-encode-translation-table - ;; :pre-write-conversion 'nfd-encode-function + :pre-write-conversion 'ucs-normalize-hfs-nfd-pre-write-conversion ) (provide 'ucs-normalize) +;; Local Variables: +;; coding: utf-8 +;; End: + ;; arch-tag: cef65ae7-71ad-4e19-8da8-56ab4d42aaa4 ;;; ucs-normalize.el ends here