;;; ucs-normalize.el --- Unicode normalization NFC/NFD/NFKD/NFKC
-;; Copyright (C) 2009
+;; Copyright (C) 2009, 2010
;; Free Software Foundation, Inc.
;; Author: Taichi Kawabata <kawabata.taichi@gmail.com>
;;; Commentary:
;;
-;; This program has passed the NormalizationTest-5.1.0.txt.
+;; This program has passed the NormalizationTest-5.2.0.txt.
;;
;; References:
;; http://www.unicode.org/reports/tr15/
;; with previous character, then the beginning of the block is
;; the searched character. If searched character is combining
;; character, then previous character will be the target
-;; character
-;; (2) end of the block
+;; character
+;; (2) end of the block
;; Block ends at non-composable starter character.
;;
;; C. Decomposition (`ucs-normalize-block')
;;; Code:
-(defconst ucs-normalize-version "1.1beta2")
+(defconst ucs-normalize-version "1.2")
(eval-when-compile (require 'cl))
+(declare-function nfd "ucs-normalize" (char))
+
(eval-when-compile
(defconst ucs-normalize-composition-exclusions
#x1D1BF #x1D1C0)
"Composition Exclusion List.
This list is taken from
- http://www.unicode.org/Public/UNIDATA/CompositionExclusions-5.1.0.txt")
+ http://www.unicode.org/Public/UNIDATA/5.2/CompositionExclusions.txt")
;; Unicode ranges that decompositions & combinings are defined.
(defvar check-range nil)
- (setq check-range '((#x00a0 . #x3400) (#xA600 . #xAC00) (#xF900 . #x10fff) (#x1d000 . #x1dfff) (#x2f800 . #x2faff)))
+ (setq check-range '((#x00a0 . #x3400) (#xA600 . #xAC00) (#xF900 . #x110ff) (#x1d000 . #x1dfff) (#x1f100 . #x1f2ff) (#x2f800 . #x2faff)))
;; Basic normalization functions
(defun nfd (char)
(eval-when-compile
- (defvar combining-chars nil)
+ (defvar combining-chars nil)
(setq combining-chars nil)
(defvar decomposition-pair-to-composition nil)
(setq decomposition-pair-to-composition nil)
check-range))
(setq combining-chars
- (append combining-chars
+ (append combining-chars
'(?ᅡ ?ᅢ ?ᅣ ?ᅤ ?ᅥ ?ᅦ ?ᅧ ?ᅨ ?ᅩ ?ᅪ
- ?ᅫ ?ᅬ ?ᅭ ?ᅮ ?ᅯ ?ᅰ ?ᅱ ?ᅲ ?ᅳ ?ᅴ ?ᅵ
+ ?ᅫ ?ᅬ ?ᅭ ?ᅮ ?ᅯ ?ᅰ ?ᅱ ?ᅲ ?ᅳ ?ᅴ ?ᅵ
?ᆨ ?ᆩ ?ᆪ ?ᆫ ?ᆬ ?ᆭ ?ᆮ ?ᆯ ?ᆰ ?ᆱ ?ᆲ ?ᆳ ?ᆴ
?ᆵ ?ᆶ ?ᆷ ?ᆸ ?ᆹ ?ᆺ ?ᆻ ?ᆼ ?ᆽ ?ᆾ ?ᆿ ?ᇀ ?ᇁ ?ᇂ)))
)
(setq ucs-normalize-combining-chars-regexp
(eval-when-compile (concat (regexp-opt (mapcar 'char-to-string combining-chars)) "+")))
+(declare-function decomposition-translation-alist "ucs-normalize"
+ (decomposition-function))
+(declare-function decomposition-char-recursively "ucs-normalize"
+ (char decomposition-function))
+(declare-function alist-list-to-vector "ucs-normalize" (alist))
+
(eval-when-compile
(defun decomposition-translation-alist (decomposition-function)
(if decomposition
(setq alist (cons (cons char
(apply 'append
- (mapcar (lambda (x)
+ (mapcar (lambda (x)
(decomposition-char-recursively
x decomposition-function))
decomposition)))
(let ((decomposition (funcall decomposition-function char)))
(if decomposition
(apply 'append
- (mapcar (lambda (x)
+ (mapcar (lambda (x)
(decomposition-char-recursively x decomposition-function))
decomposition))
(list char))))
(setq ucs-normalize-hangul-translation-alist
(let ((i 0) entries)
(while (< i 11172)
- (setq entries
- (cons (cons (+ #xac00 i)
+ (setq entries
+ (cons (cons (+ #xac00 i)
(if (= 0 (% i 28))
(vector (+ #x1100 (/ i 588))
(+ #x1161 (/ (% i 588) 28)))
i (1+ i))) entries))
(defun ucs-normalize-make-translation-table-from-alist (alist)
- (make-translation-table-from-alist
+ (make-translation-table-from-alist
(append alist ucs-normalize-hangul-translation-alist)))
(define-translation-table 'ucs-normalize-nfd-table
(ucs-normalize-make-translation-table-from-alist (eval-when-compile hfs-nfd-alist)))
(defun ucs-normalize-sort (chars)
- "Sort by canonical combining class of chars."
+ "Sort by canonical combining class of CHARS."
(sort chars
(lambda (ch1 ch2)
(< (ucs-normalize-ccc ch1) (ucs-normalize-ccc ch2)))))
chars)))
)
+(declare-function quick-check-list "ucs-normalize"
+ (decomposition-translation &optional composition-predicate))
+(declare-function quick-check-list-to-regexp "ucs-normalize" (quick-check-list))
+
(eval-when-compile
(defun quick-check-list (decomposition-translation
&optional composition-predicate)
"Quick-Check List for DECOMPOSITION-TRANSLATION and COMPOSITION-PREDICATE.
It includes Singletons, CompositionExclusions, and Non-Starter
-decomposition. "
+decomposition."
(let (entries decomposition composition)
(mapc
(lambda (start-end)
(do ((i (car start-end) (+ i 1))) ((> i (cdr start-end)))
(setq decomposition
(string-to-list
- (with-temp-buffer
+ (with-temp-buffer
(insert i)
(translate-region 1 2 decomposition-translation)
(buffer-string))))
(save-excursion
(save-restriction
(narrow-to-region (point) (+ (point) len))
- (let ((buffer-modified-p (buffer-modified-p)))
- (ucs-normalize-HFS-NFC-region (point-min) (point-max))
- (- (point-max) (point-min))))))
+ (ucs-normalize-HFS-NFC-region (point-min) (point-max))
+ (- (point-max) (point-min)))))
+
+;; Pre-write conversion for `utf-8-hfs'.
+(defun ucs-normalize-hfs-nfd-pre-write-conversion (from to)
+ (let ((old-buf (current-buffer)))
+ (set-buffer (generate-new-buffer " *temp*"))
+ (if (stringp from)
+ (insert from)
+ (insert-buffer-substring old-buf from to))
+ (ucs-normalize-HFS-NFD-region (point-min) (point-max))
+ nil))
;;; coding-system definition
(define-coding-system 'utf-8-hfs
- "UTF-8 base coding system with normalization on decoding.
+ "UTF-8 based coding system for MacOS HFS file names.
The singleton characters in HFS normalization exclusion will not
-be decomposed. It doesn't perform normalization on encoding."
+be decomposed."
:coding-type 'utf-8
:mnemonic ?U
:charset-list '(unicode)
- ;; :decode-translation-table (not necessary)
:post-read-conversion 'ucs-normalize-hfs-nfd-post-read-conversion
- ;; NFD encoder is not necessary because MacOS will automatically do it.
- ;; :encode-translation-table 'nfd-encode-translation-table
- ;; :pre-write-conversion 'nfd-encode-function
+ :pre-write-conversion 'ucs-normalize-hfs-nfd-pre-write-conversion
)
(provide 'ucs-normalize)
+;; Local Variables:
+;; coding: utf-8
+;; End:
+
;; arch-tag: cef65ae7-71ad-4e19-8da8-56ab4d42aaa4
;;; ucs-normalize.el ends here