(utf-translate-cjk-mode): Minor mode,
[bpt/emacs.git] / lisp / international / utf-8.el
index e2f4976..f595122 100644 (file)
 ;; Fixme: note that reading and writing invalid utf-8 may not be
 ;; idempotent -- to represent the bytes to fix that needs a new charset.
 ;;
-;; Characters from other character sets can be encoded with
-;; mule-utf-8 by populating the table `ucs-mule-to-mule-unicode' and
-;; registering the translation with `register-char-codings'.  Hash
-;; tables `utf-8-subst-table' and `utf-8-subst-rev-table' are used to
-;; support encoding and decoding of about a quarter of the CJK space
-;; between U+3400 and U+DFFF.
+;; Characters from other character sets can be encoded with mule-utf-8
+;; by populating the translation table
+;; `utf-translation-table-for-encode'.  Hash tables
+;; `utf-subst-table-for-decode' and `utf-subst-table-for-encode' are
+;; used to support encoding and decoding of about a quarter of the CJK
+;; space between U+3400 and U+DFFF.
 
 ;; UTF-8 is defined in RFC 2279.  A sketch of the encoding is:
 
 
 ;;; Code:
 
-(defvar ucs-mule-to-mule-unicode (make-translation-table)
-  "Translation table for encoding to `mule-utf-8'.")
-(define-translation-table 'ucs-mule-to-mule-unicode
-  ucs-mule-to-mule-unicode)
+(defvar ucs-mule-to-mule-unicode (make-char-table 'translation-table nil)
+  "Char table mapping characters to latin-iso8859-1 or mule-unicode-*.
 
-(defvar utf-8-subst-table (make-hash-table :test 'eq))
-(defvar utf-8-subst-rev-table (make-hash-table :test 'eq))
-(define-translation-hash-table 'utf-8-subst-table utf-8-subst-table)
-(define-translation-hash-table 'utf-8-subst-rev-table utf-8-subst-rev-table)
+If `unify-8859-on-encoding-mode' is non-nil, this table populates the
+translation-table named `utf-translation-table-for-encode'.")
+
+(define-translation-table 'utf-translation-table-for-encode)
 
-(defvar utf-8-translation-table-for-decode (make-translation-table)
-  "Translation table applied after decoding utf-8 to mule-unicode.
-This is only actually applied to characters which would normally be
-decoded into mule-unicode-0100-24ff.")
-(define-translation-table 'utf-8-translation-table-for-decode
-  utf-8-translation-table-for-decode)
 
 ;; Map Cyrillic and Greek to iso-8859 charsets, which take half the
 ;; space of mule-unicode.  For Latin scripts this isn't very
 ;; important.  Hebrew and Arabic might go here too when there's proper
 ;; support for them.
-(defvar utf-8-fragmentation-table (make-translation-table)
-  "Char table normally mapping non-Latin mule-unicode-... characters to iso8859.
-Used as the value of `utf-8-translation-table-for-decode' in
-`utf-8-fragment-on-decoding' mode.")
+
+(defvar utf-fragmentation-table (make-char-table 'translation-table nil)
+  "Char-table normally mapping non-Latin mule-unicode-* chars to iso-8859-*.
+
+If `utf-fragment-on-decoding' is non-nil, this table populates the
+translation-table named `utf-translation-table-for-decode'")
+
+(defvar utf-defragmentation-table (make-char-table 'translation-table nil)
+  "Char-table for reverse mapping of `utf-fragmentation-table'.
+
+If `utf-fragment-on-decoding' is non-nil and
+`unify-8859-on-encoding-mode' is nil, this table populates the
+translation-table named `utf-translation-table-for-encode'")
+
+(define-translation-table 'utf-translation-table-for-decode)
+
+
+(defvar ucs-mule-cjk-to-unicode (make-hash-table :test 'eq)
+  "Hash table mapping Emacs CJK character sets to Unicode code points.
+
+If `utf-translate-cjk' is non-nil, this table populates the
+translation-hash-table named `utf-subst-table-for-encode'.")
+
+(define-translation-hash-table 'utf-subst-table-for-encode
+  ucs-mule-cjk-to-unicode)
+
+(defvar ucs-unicode-to-mule-cjk (make-hash-table :test 'eq)
+  "Hash table mapping Unicode code points to Emacs CJK character sets.
+
+If `utf-translate-cjk' is non-nil, this table populates the
+translation-hash-table named `utf-subst-table-for-decode'.")
+
+(define-translation-hash-table 'utf-subst-table-for-decode
+  ucs-unicode-to-mule-cjk)
+
 (mapc
  (lambda (pair)
-   (aset utf-8-fragmentation-table (car pair) (cdr pair)))
+   (aset utf-fragmentation-table (car pair) (cdr pair))
+   (aset utf-defragmentation-table (cdr pair) (car pair)))
  '((?\e$,1&d\e(B . ?\e,F4\e(B) (?\e$,1&e\e(B . ?\e,F5\e(B) (?\e$,1&f\e(B . ?\e,F6\e(B) (?\e$,1&h\e(B . ?\e,F8\e(B) (?\e$,1&i\e(B . ?\e,F9\e(B)
    (?\e$,1&j\e(B . ?\e,F:\e(B) (?\e$,1&l\e(B . ?\e,F<\e(B) (?\e$,1&n\e(B . ?\e,F>\e(B) (?\e$,1&o\e(B . ?\e,F?\e(B) (?\e$,1&p\e(B . ?\e,F@\e(B)
    (?\e$,1&q\e(B . ?\e,FA\e(B) (?\e$,1&r\e(B . ?\e,FB\e(B) (?\e$,1&s\e(B . ?\e,FC\e(B) (?\e$,1&t\e(B . ?\e,FD\e(B) (?\e$,1&u\e(B . ?\e,FE\e(B)
@@ -128,8 +152,9 @@ Used as the value of `utf-8-translation-table-for-decode' in
    (?\e$,1(w\e(B . ?\e,Lw\e(B) (?\e$,1(x\e(B . ?\e,Lx\e(B) (?\e$,1(y\e(B . ?\e,Ly\e(B) (?\e$,1(z\e(B . ?\e,Lz\e(B) (?\e$,1({\e(B . ?\e,L{\e(B)
    (?\e$,1(|\e(B . ?\e,L|\e(B) (?\e$,1(~\e(B . ?\e,L~\e(B) (?\e$,1(\7f\e(B . ?\e,L\7f\e(B)))
 
-(defcustom utf-8-fragment-on-decoding nil
-  "Whether or not to decode some scripts in UTF-8 text into iso8859 charsets.
+
+(defcustom utf-fragment-on-decoding nil
+  "Whether or not to decode some chars in UTF-8/16 text into iso8859 charsets.
 Setting this means that the relevant Cyrillic and Greek characters are
 decoded into the iso8859 charsets rather than into
 mule-unicode-0100-24ff.  The iso8859 charsets take half as much space
@@ -140,44 +165,99 @@ for mechanisms to make this largely transparent.
 
 Setting this variable outside customize has no effect."
   :set (lambda (s v)
-        (setq utf-8-translation-table-for-decode
-              (if v
-                  utf-8-fragmentation-table
-                (make-char-table)))
-        (define-translation-table 'utf-8-translation-table-for-decode
-          utf-8-translation-table-for-decode)
+        (if v
+            (progn
+              (define-translation-table 'utf-translation-table-for-decode
+                utf-fragmentation-table)
+              ;; Even if unify-8859-on-encoding-mode is off, make
+              ;; mule-utf-* encode characters in
+              ;; utf-fragmentation-table.
+              (unless (eq (get 'utf-translation-table-for-encode
+                               'translation-table)
+                          ucs-mule-to-mule-unicode)
+                (define-translation-table 'utf-translation-table-for-encode
+                  utf-defragmentation-table)))
+          (define-translation-table 'utf-translation-table-for-decode)
+          ;; When unify-8859-on-encoding-mode is off, be sure to make
+          ;; mule-utf-* disabled for characters in
+          ;; utf-fragmentation-table.
+          (unless (eq (get 'utf-translation-table-for-encode
+                           'translation-table)
+                      ucs-mule-to-mule-unicode)
+            (define-translation-table 'utf-translation-table-for-encode)))
         (set-default s v))
   :version "21.4"
   :type 'boolean
   :group 'mule)
 
-(defcustom utf-8-translate-cjk nil
-  "Whether the `mule-utf-8' coding system should encode many CJK characters.
-
-Enabling this loads tables which enable the coding system to encode
-characters in the charsets `korean-ksc5601', `chinese-gb2312' and
-`japanese-jisx0208', and to decode the corresponding unicodes into
-such characters.  This works by loading the library `utf-8-subst'; see
-its commentary.  The tables are fairly large (about 33000 entries), so this
-option is not the default."
-  :link '(emacs-commentary-link "utf-8-subst")
-  :set (lambda (s v)
-        (when v
-          (require 'utf-8-subst)
-          (let ((table (make-char-table 'translation-table)))
-            (coding-system-put 'mule-utf-8 'safe-charsets
-                               (append (coding-system-get 'mule-utf-8
-                                                          'safe-charsets)
-                                       '(korean-ksc5601 chinese-gb2312
-                                                        japanese-jisx0208)))
-            (maphash (lambda (k v)
-                       (aset table k v))
-                     utf-8-subst-rev-table)
-            (register-char-codings 'mule-utf-8 table)))
-        (set-default s v))
+(define-minor-mode utf-translate-cjk-mode
+  "Whether the UTF based coding systems should decode/encode CJK characters.
+Enabling this loads tables which allow the coding systems mule-utf-8,
+mule-utf-16-le and mule-utf-16-be to encode characters in the charsets
+`korean-ksc5601', `chinese-gb2312', `chinese-big5-1',
+`chinese-big5-2', `japanese-jisx0208' and `japanese-jisx0212', and to
+decode the corresponding unicodes into such characters.
+
+Where the charsets overlap, the one preferred for decoding is chosen
+according to the language environment in effect when this option is
+turned on: ksc5601 for Korean, gb2312 for Chinese-GB, big5 for
+Chinese-Big5 and jisx for other environments.
+
+The tables are large (over 40000 entries), so this option is not the
+default.  Also, installing them may be rather slow."
+  :init-value nil
   :version "21.4"
   :type 'boolean
-  :group 'mule)
+  :set-after '(current-language-environment)
+  :group 'mule
+  :global t
+  (if utf-translate-cjk-mode
+      ;; Fixme: Allow the use of the CJK charsets to be
+      ;; customized by reordering and possible omission.
+      (progn
+       ;; Redefine them with realistic initial sizes and a
+       ;; smallish rehash size to avoid wasting significant
+       ;; space after they're built.
+       (setq ucs-mule-cjk-to-unicode
+             (make-hash-table :test 'eq :size 43000 :rehash-size 1000)
+             ucs-unicode-to-mule-cjk
+             (make-hash-table :test 'eq :size 43000 :rehash-size 1000))
+       ;; Load the files explicitly, to avoid having to keep
+       ;; around the large tables they contain (as well as the
+       ;; ones which get built).
+       (cond
+        ((string= "Korean" current-language-environment)
+         (load "subst-jis")
+         (load "subst-big5")
+         (load "subst-gb2312")
+         (load "subst-ksc"))
+        ((string= "Chinese-BIG5" current-language-environment)
+         (load "subst-jis")
+         (load "subst-ksc")
+         (load "subst-gb2312")
+         (load "subst-big5"))
+        ((string= "Chinese-GB" current-language-environment)
+         (load "subst-jis")
+         (load "subst-ksc")
+         (load "subst-big5")
+         (load "subst-gb2312"))
+        (t
+         (load "subst-ksc")
+         (load "subst-gb2312")
+         (load "subst-big5")
+         (load "subst-jis")))    ; jis covers as much as big5, gb2312
+       (let ((table (make-char-table 'translation-table)))
+         (maphash (lambda (k v)
+                    (aset table k t))
+                  ucs-mule-cjk-to-unicode)
+         (define-translation-hash-table 'utf-subst-table-for-decode
+           ucs-unicode-to-mule-cjk)
+         (define-translation-hash-table 'utf-subst-table-for-encode
+           ucs-mule-cjk-to-unicode))
+       (define-translation-hash-table 'utf-subst-table-for-decode
+         (make-hash-table :test 'eq))
+       (define-translation-hash-table 'utf-subst-table-for-encode
+         (make-hash-table :test 'eq)))))
 
 (define-ccl-program ccl-decode-mule-utf-8
   ;;
@@ -203,18 +283,20 @@ option is not the default."
     ((r5 = ,(charset-id 'eight-bit-control))
      (r6 = ,(charset-id 'eight-bit-graphic))
      (loop
+      (r0 = -1)
       (read r0)
 
       ;; 1byte encoding, i.e., ascii
       (if (r0 < #x80)
-         (write r0)
+         ((write r0))
        (if (r0 < #xc0)             ; continuation byte (invalid here)
-           (if (r0 < #xa0)
-               (write-multibyte-character r5 r0)
-             (write-multibyte-character r6 r0))
+           ((if (r0 < #xa0)
+                (write-multibyte-character r5 r0)
+              (write-multibyte-character r6 r0)))
          ;; 2 byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
          (if (r0 < #xe0)
-             ((read r1)
+             ((r1 = -1)
+              (read r1)
 
               (if ((r1 & #b11000000) != #b10000000)
                   ;; Invalid 2-byte sequence
@@ -231,7 +313,6 @@ option is not the default."
                  (r2 = r1)
                  (r0 &= #x1f)
                  (r0 <<= 6)
-                 (r2 = r1)        ; save in case of overlong sequence
                  (r1 &= #x3f)
                  (r1 += r0)
                  ;; Now r1 holds scalar value
@@ -263,13 +344,15 @@ option is not the default."
                         (r1 %= 96)
                         (r1 += (r2 + 32))
                         (translate-character
-                         utf-8-translation-table-for-decode r0 r1)
+                         utf-translation-table-for-decode r0 r1)
                         (write-multibyte-character r0 r1))))))))
 
            ;; 3byte encoding
            ;; zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx
            (if (r0 < #xf0)
-               ((read r1 r2)
+               ((r1 = -1)
+                (r2 = -1)
+                (read r1 r2)
 
                 ;; This is set to 1 if the encoding is invalid.
                 (r4 = 0)
@@ -299,7 +382,7 @@ option is not the default."
                        (if (r2 < #xa0)
                            (write-multibyte-character r5 r2)
                          (write-multibyte-character r6 r2))))
-                
+
                   ;; mule-unicode-0100-24ff (>= 0800)
                   ((if (r3 < #x2500)
                        ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
@@ -308,28 +391,31 @@ option is not the default."
                         (r1 = (r7 + 32))
                         (r1 += ((r3 + 32) << 7))
                         (translate-character
-                         utf-8-translation-table-for-decode r0 r1)
+                         utf-translation-table-for-decode r0 r1)
                         (write-multibyte-character r0 r1))
-                   
+
                      ;; mule-unicode-2500-33ff
-                     ;; Fixme: Perhaps allow translation via
-                     ;; utf-8-subst-table for #x2e80 up, so that we use
-                     ;; consistent charsets for all of CJK.  Would need
-                     ;; corresponding change to encoding tables.
                      (if (r3 < #x3400)
-                         ((r0 = ,(charset-id 'mule-unicode-2500-33ff))
-                          (r3 -= #x2500)
-                          (r3 //= 96)
-                          (r1 = (r7 + 32))
-                          (r1 += ((r3 + 32) << 7))
-                          (write-multibyte-character r0 r1))
+                         ((r4 = r3)    ; don't zap r3
+                          (lookup-integer utf-subst-table-for-decode r4 r5)
+                          (if r7
+                              ;; got a translation
+                              ((write-multibyte-character r4 r5)
+                               ;; Zapped through register starvation.
+                               (r5 = ,(charset-id 'eight-bit-control)))
+                            ((r0 = ,(charset-id 'mule-unicode-2500-33ff))
+                             (r3 -= #x2500)
+                             (r3 //= 96)
+                             (r1 = (r7 + 32))
+                             (r1 += ((r3 + 32) << 7))
+                             (write-multibyte-character r0 r1))))
 
                        ;; U+3400 .. U+D7FF
                        ;; Try to convert to CJK chars, else keep
                        ;; them as eight-bit-{control|graphic}.
                        (if (r3 < #xd800)
                            ((r4 = r3)  ; don't zap r3
-                            (lookup-integer utf-8-subst-table r4 r5)
+                            (lookup-integer utf-subst-table-for-decode r4 r5)
                             (if r7
                                 ;; got a translation
                                 ((write-multibyte-character r4 r5)
@@ -357,7 +443,7 @@ option is not the default."
                                   (r3 = r5)
                                 (r3 = r6))
                               (write-multibyte-character r3 r2))
-                       
+
                            ;; mule-unicode-e000-ffff
                            ;; Fixme: fffe and ffff are invalid.
                            ((r0 = ,(charset-id 'mule-unicode-e000-ffff))
@@ -370,8 +456,11 @@ option is not the default."
              (if (r0 < #xfe)
                  ;; 4byte encoding
                  ;; keep those bytes as eight-bit-{control|graphic}
-                 ;; Fixme: allow lookup in utf-8-subst-table.
-                 ((read r1 r2 r3)
+                 ;; Fixme: allow lookup in utf-subst-table-for-decode.
+                 ((r1 = -1)
+                  (r2 = -1)
+                  (r3 = -1)
+                  (read r1 r2 r3)
                   ;; r0 > #xf0, thus eight-bit-graphic
                   (write-multibyte-character r6 r0)
                   (if (r1 < #xa0)
@@ -390,27 +479,55 @@ option is not the default."
                         (write-multibyte-character r5 r3))
                     (write-multibyte-character r6 r3))
                   (if (r0 >= #xf8)     ; 5- or 6-byte encoding
-                      ((read r1)
-                       (if (r1 < #xa0)
-                           (if (r1 < #x80) ; invalid byte
-                               (write r1)
-                             (write-multibyte-character r5 r1))
-                         (write-multibyte-character r6 r1))
+                      ((r0 = -1)
+                       (read r0)
+                       (if (r0 < #xa0)
+                           (if (r0 < #x80) ; invalid byte
+                               (write r0)
+                             (write-multibyte-character r5 r0))
+                         (write-multibyte-character r6 r0))
                        (if (r0 >= #xfc) ; 6-byte
-                           ((read r1)
-                            (if (r1 < #xa0)
-                                (if (r1 < #x80) ; invalid byte
-                                    (write r1)
-                                  (write-multibyte-character r5 r1))
-                              (write-multibyte-character r6 r1)))))))
+                           ((r0 = -1)
+                            (read r0)
+                            (if (r0 < #xa0)
+                                (if (r0 < #x80) ; invalid byte
+                                    (write r0)
+                                  (write-multibyte-character r5 r0))
+                              (write-multibyte-character r6 r0)))))))
                ;; else invalid byte >= #xfe
                (write-multibyte-character r6 r0))))))
-      (repeat))))
+      (repeat)))
+
+    ;; At EOF...
+    (if (r0 >= 0)
+       ((if (r0 < #x80)
+            (write r0)
+          (if (r0 < #xa0)
+              (write-multibyte-character r5 r0)
+            ((write-multibyte-character r6 r0))))
+        (if (r1 >= 0)
+            ((if (r1 < #x80)
+                 (write r1)
+               (if (r1 < #xa0)
+                   (write-multibyte-character r5 r1)
+                 ((write-multibyte-character r6 r1))))
+             (if (r2 >= 0)
+                 ((if (r2 < #x80)
+                      (write r2)
+                    (if (r2 < #xa0)
+                        (write-multibyte-character r5 r2)
+                      ((write-multibyte-character r6 r2))))
+                  (if (r3 >= 0)
+                      (if (r3 < #x80)
+                          (write r3)
+                        (if (r3 < #xa0)
+                            (write-multibyte-character r5 r3)
+                          ((write-multibyte-character r6 r3))))))))))))
 
   "CCL program to decode UTF-8.
 Basic decoding is done into the charsets ascii, latin-iso8859-1 and
-mule-unicode-*, but see also `utf-8-translation-table-for-decode' and
-`utf-8-subst-table'.
+mule-unicode-*, but see also `utf-fragmentation-table' and
+`ucs-mule-cjk-to-unicode'.
 Encodings of un-representable Unicode characters are decoded asis into
 eight-bit-control and eight-bit-graphic characters.")
 
@@ -421,7 +538,7 @@ eight-bit-control and eight-bit-graphic characters.")
       (if (r5 < 0)
          ((r1 = -1)
           (read-multibyte-character r0 r1)
-          (translate-character ucs-mule-to-mule-unicode r0 r1))
+          (translate-character utf-translation-table-for-encode r0 r1))
        (;; We have already done read-multibyte-character.
         (r0 = r5)
         (r1 = r6)
@@ -516,7 +633,7 @@ eight-bit-control and eight-bit-graphic characters.")
                                ((write #xc2)
                                 (write r1)))))))
 
-                   ((lookup-character utf-8-subst-rev-table r0 r1)
+                   ((lookup-character utf-subst-table-for-encode r0 r1)
                     (if r7             ; lookup succeeded
                         ((r1 = (((r0 & #xf000) >> 12) | #xe0))
                          (r2 = ((r0 & #x3f) | #x80))
@@ -538,10 +655,6 @@ eight-bit-control and eight-bit-graphic characters.")
 
   "CCL program to encode into UTF-8.")
 
-;; Dummy definition so that the CCL can be checked correctly; the
-;; actual data are loaded on demand.
-(unless (boundp 'ucs-mule-8859-to-mule-unicode)        ; don't zap it
-  (define-translation-table 'ucs-mule-8859-to-mule-unicode))
 
 (define-ccl-program ccl-untranslated-to-ucs
   `(0
@@ -648,7 +761,7 @@ Also compose particular scripts if `utf-8-compose-scripts' is non-nil."
 ;; ucs-tables is preloaded
 ;; (defun utf-8-pre-write-conversion (beg end)
 ;;   "Semi-dummy pre-write function effectively to autoload ucs-tables."
-;;   ;; Ensure translation table is loaded.
+;;   ;; Ensure translation-table is loaded.
 ;;   (require 'ucs-tables)
 ;;   ;; Don't do this again.
 ;;   (coding-system-put 'mule-utf-8 'pre-write-conversion nil)
@@ -657,33 +770,21 @@ Also compose particular scripts if `utf-8-compose-scripts' is non-nil."
 (make-coding-system
  'mule-utf-8 4 ?u
  "UTF-8 encoding for Emacs-supported Unicode characters.
-The supported Emacs character sets are the following, plus any other
-characters included in the tables `ucs-mule-to-mule-unicode' and
-`utf-8-subst-rev-table':
- ascii
- eight-bit-control
- eight-bit-graphic
- latin-iso8859-1
- latin-iso8859-2
- latin-iso8859-3
- latin-iso8859-4
- cyrillic-iso8859-5
- greek-iso8859-7
- hebrew-iso8859-8
- latin-iso8859-9
- latin-iso8859-14
- latin-iso8859-15
- mule-unicode-0100-24ff
- mule-unicode-2500-33ff
- mule-unicode-e000-ffff
-
-Unicode characters out of the ranges U+0000-U+33FF and U+E200-U+FFFF
-may be decoded into korean-ksc5601, chinese-gb2312, japanese-jisx0208
-\(see user option `utf-8-translate-cjk'); otherwise, sequences of
-eight-bit-control and eight-bit-graphic characters are used to
-preserve their byte sequences, and these are composed to display as a
-single character.  Emacs characters that otherwise can't be encoded
-are encoded as U+FFFD."
+It supports Unicode characters of these ranges:
+    U+0000..U+33FF, U+E000..U+FFFF.
+They correspond to these Emacs character sets:
+    ascii, latin-iso8859-1, mule-unicode-0100-24ff,
+    mule-unicode-2500-33ff, mule-unicode-e000-ffff
+
+On decoding (e.g. reading a file), Unicode characters not in the above
+ranges are decoded into sequences of eight-bit-control and
+eight-bit-graphic characters to preserve their byte sequences.  The
+byte sequence is preserved on i/o for valid utf-8, but not necessarily
+for invalid utf-8.
+
+On encoding (e.g. writing a file), Emacs characters not belonging to
+any of the character sets listed above are encoded into the UTF-8 byte
+sequence representing U+FFFD (REPLACEMENT CHARACTER)."
 
  '(ccl-decode-mule-utf-8 . ccl-encode-mule-utf-8)
  '((safe-charsets
@@ -691,24 +792,6 @@ are encoded as U+FFFD."
     eight-bit-control
     eight-bit-graphic
     latin-iso8859-1
-    latin-iso8859-15
-    latin-iso8859-14
-    latin-iso8859-9
-    hebrew-iso8859-8
-    greek-iso8859-7
-    cyrillic-iso8859-5
-    latin-iso8859-4
-    latin-iso8859-3
-    latin-iso8859-2
-    vietnamese-viscii-lower
-    vietnamese-viscii-upper
-    thai-tis620
-    ipa
-    ethiopic
-    indian-is13194
-    katakana-jisx0201
-    chinese-sisheng
-    lao
     mule-unicode-0100-24ff
     mule-unicode-2500-33ff
     mule-unicode-e000-ffff)
@@ -716,7 +799,11 @@ are encoded as U+FFFD."
    (coding-category . coding-category-utf-8)
    (valid-codes (0 . 255))
 ;;    (pre-write-conversion . utf-8-pre-write-conversion)
-   (post-read-conversion . utf-8-post-read-conversion)))
+   (post-read-conversion . utf-8-post-read-conversion)
+   (dependency unify-8859-on-encoding-mode
+              unify-8859-on-decoding-mode
+              utf-fragment-on-decoding
+              utf-translate-cjk)))
 
 (define-coding-system-alias 'utf-8 'mule-utf-8)