[bpt/emacs.git] / lisp / international / utf-16.el

;;; utf-16.el --- UTF-16 encoding/decoding

;; Copyright (C) 2001, 2002, 2003, 2004 Free Software Foundation, Inc.

;; Author: Dave Love <fx@gnu.org>
;; Keywords: Unicode, UTF-16, i18n

;; This file is part of GNU Emacs.

;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;;; Commentary:

;; Support for UTF-16, which is a two-byte encoding (modulo
;; surrogates) of Unicode, defined in RFC 2781.  It is written either
;; in little or big endian order and either with or without the
;; leading BOM (a two-byte signature which identifies their byte sex).
;;
;; We provide these base coding systems.
;;	name					endian	BOM
;;	----					------	---
;;	mule-utf-16le				little	no
;;	mule-utf-16be				big	no
;;	mule-utf-16le-with-signature		little	yes
;;	mule-utf-16be-with-signature		big	yes
;;	mule-utf-16				both	yes
;;
;; Note that un-decodable sequences aren't (yet?) preserved as raw
;; bytes, as they are with utf-8, so reading and writing as utf-16 can
;; corrupt data.

;;; Code:

;; We end up with trivially different -le and -be versions of most
;; things below, sometimes with commonality abstracted into a let
;; binding for maintenance convenience.

;; Needed in macro expansion, so can't be let-bound.  Zapped after use.
(eval-and-compile
(defconst utf-16-decode-ucs
  ;; If r5 is negative, r1 is a Unicode chacter code.  Otherise, r5 is
  ;; the first of a surrogate pair and r1 is the second of the pair.
  ;; Output is charset ID in r0, code point in r1.  R0 may be set to
  ;; -1 in which case a caller should not write out r1.
  `((if (r5 >= 0)
	((r0 = (r1 < #xDC00))
	 (if ((r1 >= #xE000) | r0)
	     ;; Invalid second code of surrogate pair.
	     ((r0 = r5)
	      (call ccl-mule-utf-untrans))
	   ((r1 -= #xDC00)
	    (r1 += (((r5 - #xD800) << 10) + #x10000))))
	 (r5 = -1)))
    (if (r1 < 128)
	(r0 = ,(charset-id 'ascii))
      ((lookup-integer utf-subst-table-for-decode r1 r3)
       (if r7				; got a translation
	   ((r0 = r1) (r1 = r3))
	 (if (r1 < 160)
	     (r0 = ,(charset-id 'eight-bit-control))
	   (if (r1 < 256)
	       ((r0 = ,(charset-id 'latin-iso8859-1))
		(r1 -= 128))
	     (if (r1 < #x2500)
		 ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
		  (r1 -= #x100)
		  (r2 = (((r1 / 96) + 32) << 7))
		  (r1 %= 96)
		  (r1 += (r2 + 32)))
	       (if (r1 < #x3400)
		   ((r0 = ,(charset-id 'mule-unicode-2500-33ff))
		    (r1 -= #x2500)
		    (r2 = (((r1 / 96) + 32) << 7))
		    (r1 %= 96)
		    (r1 += (r2 + 32)))
		 (if (r1 < #xD800)
		     ;; We can't have this character.
		     ((r0 = r1)
		      (call ccl-mule-utf-untrans)
		      (r5 = -1)
		      (r0 = -1))
		   (if (r1 < #xDC00)
		       ;; The first code of a surrogate pair.
		       ((r5 = r1)
			(r0 = -1))
		     (if (r1 < #xE000)
			 ;; The second code of a surrogate pair, invalid.
			 ((r0 = r1)
			  (call ccl-mule-utf-untrans)
			  (r5 = -1)
			  (r0 = -1))
		       (if (r1 < #x10000)
			   ((r0 = ,(charset-id 'mule-unicode-e000-ffff))
			    (r1 -= #xE000)
			    (r2 = (((r1 / 96) + 32) << 7))
			    (r1 %= 96)
			    (r1 += (r2 + 32)))
			 ;; We can't have this character.
			 ((r0 = r1)
			  (call ccl-mule-utf-untrans)
			  (r5 = -1)
			  (r0 = -1)))))))))))))))

(defconst utf-16le-decode-loop
  `((r5 = -1)
    (loop
     (r3 = -1)
     (read r3 r4)
     (r1 = (r4 <8 r3))
     ,@utf-16-decode-ucs
     (if (r0 >= 0)
	 ((translate-character utf-translation-table-for-decode r0 r1)
	  (write-multibyte-character r0 r1)))
     (repeat))))

(defconst utf-16be-decode-loop
  `((r5 = -1)
    (loop
     (r3 = -1)
     (read r3 r4)
     (r1 = (r3 <8 r4))
     ,@utf-16-decode-ucs
     (if (r0 >= 0)
	 ((translate-character utf-translation-table-for-decode r0 r1)
	  (write-multibyte-character r0 r1)))
     (repeat))))

)

(define-ccl-program ccl-decode-mule-utf-16le
  `(2					; 2 bytes -> 1 to 4 bytes
    ,utf-16le-decode-loop
    ((if (r5 >= 0)
	 ((r0 = r5)
	  (call ccl-mule-utf-untrans)))
     (if (r3 < 0)
	 nil
       ((if (r3 < #xA0)
	    (r0 = ,(charset-id 'eight-bit-control))
	  (r0 = ,(charset-id 'eight-bit-graphic)))
	(write-multibyte-character r0 r3)))))
  "Decode UTF-16LE (little endian without signature bytes).
Basic decoding is done into the charsets ascii, latin-iso8859-1 and
mule-unicode-*.  Un-representable Unicode characters are decoded as
U+fffd.  The result is run through the translation-table named
`utf-translation-table-for-decode'.")

(define-ccl-program ccl-decode-mule-utf-16be
  `(2					; 2 bytes -> 1 to 4 bytes
    ,utf-16be-decode-loop
    ((if (r5 >= 0)
	 ((r0 = r5)
	  (call ccl-mule-utf-untrans)))
     (if (r3 >= 0)
	 ((r0 = r3)
	  (call ccl-mule-utf-untrans)))))
  "Decode UTF-16BE (big endian without signature bytes).
Basic decoding is done into the charsets ascii, latin-iso8859-1 and
mule-unicode-*.  Un-representable Unicode characters are
decoded as U+fffd.  The result is run through the translation-table of
name `utf-translation-table-for-decode'.")

(define-ccl-program ccl-decode-mule-utf-16le-with-signature
  `(2
    ((r3 = -1)
     (read r3 r4)
     ,@utf-16le-decode-loop)
    (if (r3 >= 0)
	((r0 = r3)
	 (call ccl-mule-utf-untrans))))
  "Like ccl-decode-utf-16le but skip the first 2-byte BOM.")

(define-ccl-program ccl-decode-mule-utf-16be-with-signature
  `(2
    ((r3 = -1)
     (read r3 r4)
     ,@utf-16be-decode-loop)
    (if (r3 >= 0)
	((r0 = r3)
	 (call ccl-mule-utf-untrans))))
  "Like ccl-decode-utf-16be but skip the first 2-byte BOM.")

(define-ccl-program ccl-decode-mule-utf-16
  `(2
    ((r3 = -1)
     (read r3 r4)
     (r1 = (r3 <8 r4))
     (r5 = -1)
     (if (r1 == #xFFFE)
	 ;; R1 is a BOM for little endian.  We keep this character as
	 ;; is temporarily.  It is removed by post-read-conversion
	 ;; function.
	 (,@utf-16-decode-ucs
	  (write-multibyte-character r0 r1)
	  ,@utf-16le-decode-loop)
       ((if (r1 == #xFEFF)
	    ;; R1 is a BOM for big endian, but we can't keep that
	    ;; character in the output because it can't be
	    ;; distinguished with the normal U+FEFF.  So, we keep
	    ;; #xFFFF instead.
	    ((r1 = #xFFFF)
	     ,@utf-16-decode-ucs
	     (write-multibyte-character r0 r1))
	  ;; R1 is a normal Unicode character.
	  (,@utf-16-decode-ucs
	   (if (r0 >= 0)
	       ((translate-character utf-translation-table-for-decode r0 r1)
		(write-multibyte-character r0 r1)))))
	,@utf-16be-decode-loop)))
    (if (r3 >= 0)
	((r0 = r3)
	 (call ccl-mule-utf-untrans))))
  "Like ccl-decode-utf-16be/le but check the first BOM.")

(makunbound 'utf-16-decode-ucs)		; done with it
(makunbound 'utf-16le-decode-loop)
(makunbound 'utf-16be-decode-loop)

;; UTF-16 decoder generates an UTF-8 sequence represented by a
;; sequence eight-bit-control/graphic chars for an invalid byte (the
;; last byte of an odd length source) and an untranslatable character
;; (including an invalid surrogate-pair code-point).
;;
;; This CCL parses that sequence (the first byte is already in r1),
;; and if the sequence represents an untranslatable character, it sets
;; r1 to the original invalid code or untranslated Unicode character
;; code, sets r2 to -1 (to prevent r2 and r3 are written), set2 r5 to
;; -1 (to tell the caller that there's no pre-read character).
;;
;; If the sequence represents an invalid byte, it sets r1 to -1, r2 to
;; the byte, sets r3 and r5 to -1.
;;
;; Otherwise, don't change r1, set r2 and r3 to already read
;; eight-bit-control/graphic characters (if any), set r5 and r6 to the
;; last character that invalidates the UTF-8 form.
;;
;; Note: For UTF-8 validation, we only check if a character is
;; eight-bit-control/graphic or not.  It may result in incorrect
;; handling of random binary data, but such a data can't be encoded by
;; UTF-16 anyway.  At least, UTF-16 decoder doesn't generate such a
;; sequence even if a source contains invalid byte-sequence.

(define-ccl-program ccl-mule-utf-16-encode-untrans
  `(0
    ((r2 = -1)
     ;; Read the 2nd byte.
     (read-multibyte-character r5 r6)
     (r0 = (r5 != ,(charset-id 'eight-bit-control)))
     (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
	 ((r2 = r1)
	  (r3 = -1)
	  (r1 = -1)
	  (end)))			; invalid UTF-8

     (r3 = -1)
     (r2 = r6)
     (if (r1 <= #xE0)
	 ;; 2-byte UTF-8, i.e. originally an invalid byte.
	 ((r2 &= #x3F)
	  (r2 |= ((r1 & #x1F) << 6))
	  (r1 = -1)
	  (r5 = -1)
	  (end)))
	 
     ;; Read the 3rd byte.
     (read-multibyte-character r5 r6)
     (r0 = (r5 != ,(charset-id 'eight-bit-control)))	       
     (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
	 ((end)))			; invalid UTF-8

     (if (r1 < #xF0)		; valid 3-byte UTF-8
	 ((r1 = ((r1 & #x0F) << 12))
	  (r1 |= ((r2 & #x3F) << 6))
	  (r1 |= (r6 & #x3F))
	  (r2 = -1)
	  (r5 = -1)
	  (end)))

     (r3 = r6)
     ;; Read the 4th byte.
     (read-multibyte-character r5 r6)
     (r0 = (r5 != ,(charset-id 'eight-bit-control)))	       
     (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
	 (end))			    ; livalid UTF-8

     ;; valid 4-byte UTF-8
     (r1 = ((r1 & #x07) << 18))
     (r1 |= ((r2 & #x3F) << 12))
     (r1 |= ((r3 & #x3F) << 6))
     (r1 |= (r6 & #x3F))
     (r2 = -1)
     (r5 = -1)
     (end))

    (if (r1 >= 0)
	((write r1)
	 (if (r2 >= 0)
	     ((write r2)
	      (if (r3 >= 0)
		  (write r3))))))))

(eval-and-compile
(defconst utf-16-decode-to-ucs
  ;; Read a character and set r1 to the corresponding Unicode code.
  ;; If r5 is not negative, it means that we have already read a
  ;; character into r5 and r6.
  ;; If an invalid eight-bit-control/graphic sequence is found, r2 and
  ;; r3 may contain a byte to written out, r5 and r6 may contain a
  ;; pre-read character.  Usually they are set to -1.
  `((if (r5 < 0)
	(read-multibyte-character r0 r1)
      ((r0 = r5)
       (r1 = r6)
       (r5 = -1)))
    (lookup-character utf-subst-table-for-encode r0 r1)
    (r2 = -1)
    (if (r7 > 0)
	(r1 = r0)
      ((translate-character utf-translation-table-for-encode r0 r1)
       (if (r0 == ,(charset-id 'ascii))
	   nil
	 (if (r0 == ,(charset-id 'latin-iso8859-1))
	     (r1 += 128)
	   (if (r0 == ,(charset-id 'eight-bit-control))
	       nil
	     (if (r0 == ,(charset-id 'eight-bit-graphic))
		 (call ccl-mule-utf-16-encode-untrans)
	       ((r2 = ((r1 & #x7f) - 32))
		(r3 = ((((r1 >> 7) - 32) * 96) + r2))
		(r2 = -1)
		(r5 = -1)
		(if (r0 == ,(charset-id 'mule-unicode-0100-24ff))
		    (r1 = (r3 + #x100))
		  (if (r0 == ,(charset-id 'mule-unicode-2500-33ff))
		      (r1 = (r3 + #x2500))
		    (if (r0 == ,(charset-id 'mule-unicode-e000-ffff))
			(r1 = (r3 + #xe000))
		      (r1 = #xfffd)))))))))))))

(defconst utf-16le-encode-loop
  `((r5 = -1)
    (loop
     ,@utf-16-decode-to-ucs
     (if (r1 >= #x10000)
	 ((r1 -= #x10000)
	  (r0 = ((r1 >> 10) + #xD800))
	  (write (r0 & 255))
	  (write (r0 >> 8))
	  (r1 = ((r1 & #x3FF) + #xDC00))))
     (if (r1 >= 0)
	 ((write (r1 & 255))
	  (write (r1 >> 8))))
     (if (r2 >= 0)
	 ((write r2)
	  (if (r3 >= 0)
	      (write r3))))
     (repeat))))

(defconst utf-16be-encode-loop
  `((r5 = -1)
    (loop
     ,@utf-16-decode-to-ucs
     (if (r1 >= #x10000)
	 ((r1 -= #x10000)
	  (r0 = ((r1 >> 10) + #xD800))
	  (write (r0 >> 8))
	  (write (r0 & 255))
	  (r1 = ((r1 & #x3FF) + #xDC00))))
     (if (r1 >= 0)
	 ((write (r1 >> 8))
	  (write (r1 & 255))))
     (if (r2 >= 0)
	 ((write r2)
	  (if (r3 >= 0)
	      (write r3))))
     (repeat))))
)


(define-ccl-program ccl-encode-mule-utf-16le
  `(2
    ,utf-16le-encode-loop)
  "Encode to UTF-16LE (little endian without signature).
Characters from the charsets ascii, eight-bit-control,
eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
after translation through the translation-table of name
`utf-translation-table-for-encode'.
Others are encoded as U+FFFD.")

(define-ccl-program ccl-encode-mule-utf-16be
  `(2
    ,utf-16be-encode-loop)
  "Encode to UTF-16BE (big endian without signature).
Characters from the charsets ascii, eight-bit-control,
eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
after translation through the translation-table named
`utf-translation-table-for-encode'.
Others are encoded as U+FFFD.")

(define-ccl-program ccl-encode-mule-utf-16le-with-signature
  `(2
    ((write #xFF)
     (write #xFE)
     ,@utf-16le-encode-loop))
  "Encode to UTF-16 (little endian with signature).
Characters from the charsets ascii, eight-bit-control,
eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
after translation through the translation-table of name
`utf-translation-table-for-encode'.
Others are encoded as U+FFFD.")

(define-ccl-program ccl-encode-mule-utf-16be-with-signature
  `(2
    ((write #xFE)
     (write #xFF)
     ,@utf-16be-encode-loop))
  "Encode to UTF-16 (big endian with signature).
Characters from the charsets ascii, eight-bit-control,
eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
after translation through the translation-table named
`utf-translation-table-for-encode'.
Others are encoded as U+FFFD.")

(makunbound 'utf-16-decode-to-ucs)
(makunbound 'utf-16le-encode-loop)
(makunbound 'utf-16be-encode-loop)

(defun mule-utf-16-post-read-conversion (length)
  (when (> length 0)
    (setq length (utf-8-post-read-conversion length))
    (let ((char (following-char)))
      (cond ((= char (decode-char 'ucs #xFFFE))
	     (delete-char 1)
	     (setq last-coding-system-used
		   (coding-system-change-text-conversion
		    last-coding-system-used
		    'mule-utf-16le-with-signature))
	     (setq length (1- length)))
	    ((= char (decode-char 'ucs #xFFFF))
	     (delete-char 1)
	     (setq last-coding-system-used
		   (coding-system-change-text-conversion
		    last-coding-system-used
		    'mule-utf-16be-with-signature))
	     (setq length (1- length)))
	    (t
	     (setq last-coding-system-used 'mule-utf-16be)))))
  length)

(let ((doc "

It supports Unicode characters of these ranges:
    U+0000..U+33FF, U+E000..U+FFFF.
They correspond to these Emacs character sets:
    ascii, latin-iso8859-1, mule-unicode-0100-24ff,
    mule-unicode-2500-33ff, mule-unicode-e000-ffff

On decoding (e.g. reading a file), Unicode characters not in the above
ranges are decoded as U+FFFD, effectively corrupting the data
if they are re-encoded.

On encoding (e.g. writing a file), Emacs characters not belonging to
any of the character sets listed above are encoded into the byte
sequence representing U+FFFD (REPLACEMENT CHARACTER).")
      (props `((safe-charsets
		ascii
		eight-bit-control
		eight-bit-graphic
		latin-iso8859-1
		mule-unicode-0100-24ff
		mule-unicode-2500-33ff
		mule-unicode-e000-ffff
		,@(if utf-translate-cjk-mode
		      utf-translate-cjk-charsets))
	       (valid-codes (0 . 255))
	       (mime-text-unsuitable . t)
	       (pre-write-conversion . utf-8-pre-write-conversion)
	       (dependency unify-8859-on-encoding-mode
			   unify-8859-on-decoding-mode
			   utf-fragment-on-decoding
			   utf-translate-cjk-mode))))
  (make-coding-system
   'mule-utf-16le 4
   ?u	      ; Mule-UCS uses ?U, but code-pages uses that for koi8-u.
   (concat
    "UTF-16LE encoding for Emacs-supported Unicode characters."
    doc)
   '(ccl-decode-mule-utf-16le . ccl-encode-mule-utf-16le)
   `(,@props
     (post-read-conversion . utf-8-post-read-conversion)
     (mime-charset . utf-16le)))

  (make-coding-system
   'mule-utf-16be 4 ?u
   (concat
    "UTF-16BE encoding for Emacs-supported Unicode characters."
    doc)

   '(ccl-decode-mule-utf-16be . ccl-encode-mule-utf-16be)
   `(,@props
     (post-read-conversion . utf-8-post-read-conversion)
     (mime-charset . utf-16be)))

  (make-coding-system
   'mule-utf-16le-with-signature 4 ?u
   (concat
    "Little endian UTF-16 (with BOM) for Emacs-supported Unicode characters."
    doc)

   '(ccl-decode-mule-utf-16le-with-signature
     . ccl-encode-mule-utf-16le-with-signature)
   `(,@props
     (post-read-conversion . utf-8-post-read-conversion)
     (coding-category . coding-category-utf-16-le)
     (mime-charset . utf-16)))

  (make-coding-system
   'mule-utf-16be-with-signature 4 ?u
   (concat
    "Big endian UTF-16 (with BOM) for Emacs-supported Unicode characters."
    doc)

   '(ccl-decode-mule-utf-16be-with-signature
     . ccl-encode-mule-utf-16be-with-signature)
   `(,@props
     (post-read-conversion . utf-8-post-read-conversion)
     (coding-category . coding-category-utf-16-be)
     (mime-charset . utf-16)))

  (make-coding-system
   'mule-utf-16 4 ?u
   (concat
    "UTF-16 (with or without BOM) for Emacs-supported Unicode characters."
    doc)

   '(ccl-decode-mule-utf-16 . ccl-encode-mule-utf-16be-with-signature)
   `(,@props
     (post-read-conversion . mule-utf-16-post-read-conversion)
     (coding-category . coding-category-utf-16-be)
     (mime-charset . utf-16)))
)

(define-coding-system-alias 'utf-16le 'mule-utf-16le)
(define-coding-system-alias 'utf-16be 'mule-utf-16be)
(define-coding-system-alias 'utf-16le-with-signature
  'mule-utf-16le-with-signature)
(define-coding-system-alias 'utf-16be-with-signature
  'mule-utf-16be-with-signature)
(define-coding-system-alias 'utf-16 'mule-utf-16)

;; For backward compatibility.
(define-coding-system-alias 'mule-utf-16-le 'mule-utf-16le-with-signature)
(define-coding-system-alias 'utf-16-le 'mule-utf-16le-with-signature)
(define-coding-system-alias 'mule-utf-16-be 'mule-utf-16be-with-signature)
(define-coding-system-alias 'utf-16-be 'mule-utf-16be-with-signature)

;;; arch-tag: 85455d46-d9c9-466d-a6f3-c3582a7367c4
;;; utf-16.el ends here
Commit	Line	Data
fc2938d1 DL	1	;;; utf-16.el --- UTF-16 encoding/decoding
fc2938d1 DL	2
9e24a165	3	;; Copyright (C) 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
fc2938d1 DL	4
	5	;; Author: Dave Love <fx@gnu.org>
	6	;; Keywords: Unicode, UTF-16, i18n
	7
	8	;; This file is part of GNU Emacs.
	9
	10	;; GNU Emacs is free software; you can redistribute it and/or modify
	11	;; it under the terms of the GNU General Public License as published by
	12	;; the Free Software Foundation; either version 2, or (at your option)
	13	;; any later version.
	14
	15	;; GNU Emacs is distributed in the hope that it will be useful,
	16	;; but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	;; GNU General Public License for more details.
	19
	20	;; You should have received a copy of the GNU General Public License
	21	;; along with GNU Emacs; see the file COPYING. If not, write to the
	22	;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
	23	;; Boston, MA 02111-1307, USA.
	24
	25	;;; Commentary:
	26
	27	;; Support for UTF-16, which is a two-byte encoding (modulo
cbcd4dc9 DL	28	;; surrogates) of Unicode, defined in RFC 2781. It is written either
	29	;; in little or big endian order and either with or without the
	30	;; leading BOM (a two-byte signature which identifies their byte sex).
65a0e5fe	31	;;
cbcd4dc9	32	;; We provide these base coding systems.
65a0e5fe KH	33	;; name endian BOM
	34	;; ---- ------ ---
	35	;; mule-utf-16le little no
	36	;; mule-utf-16be big no
	37	;; mule-utf-16le-with-signature little yes
	38	;; mule-utf-16be-with-signature big yes
	39	;; mule-utf-16 both yes
	40	;;
fc2938d1 DL	41	;; Note that un-decodable sequences aren't (yet?) preserved as raw
	42	;; bytes, as they are with utf-8, so reading and writing as utf-16 can
	43	;; corrupt data.
	44
	45	;;; Code:
	46
	47	;; We end up with trivially different -le and -be versions of most
	48	;; things below, sometimes with commonality abstracted into a let
	49	;; binding for maintenance convenience.
	50
fc2938d1 DL	51	;; Needed in macro expansion, so can't be let-bound. Zapped after use.
	52	(eval-and-compile
	53	(defconst utf-16-decode-ucs
95d2d433 KH	54	;; If r5 is negative, r1 is a Unicode chacter code. Otherise, r5 is
	55	;; the first of a surrogate pair and r1 is the second of the pair.
	56	;; Output is charset ID in r0, code point in r1. R0 may be set to
	57	;; -1 in which case a caller should not write out r1.
	58	`((if (r5 >= 0)
	59	((r0 = (r1 < #xDC00))
	60	(if ((r1 >= #xE000) \| r0)
	61	;; Invalid second code of surrogate pair.
	62	((r0 = r5)
	63	(call ccl-mule-utf-untrans))
	64	((r1 -= #xDC00)
	65	(r1 += (((r5 - #xD800) << 10) + #x10000))))
	66	(r5 = -1)))
	67	(if (r1 < 128)
	68	(r0 = ,(charset-id 'ascii))
	69	((lookup-integer utf-subst-table-for-decode r1 r3)
	70	(if r7 ; got a translation
	71	((r0 = r1) (r1 = r3))
	72	(if (r1 < 160)
	73	(r0 = ,(charset-id 'eight-bit-control))
	74	(if (r1 < 256)
	75	((r0 = ,(charset-id 'latin-iso8859-1))
	76	(r1 -= 128))
	77	(if (r1 < #x2500)
	78	((r0 = ,(charset-id 'mule-unicode-0100-24ff))
	79	(r1 -= #x100)
	80	(r2 = (((r1 / 96) + 32) << 7))
	81	(r1 %= 96)
	82	(r1 += (r2 + 32)))
	83	(if (r1 < #x3400)
	84	((r0 = ,(charset-id 'mule-unicode-2500-33ff))
	85	(r1 -= #x2500)
	86	(r2 = (((r1 / 96) + 32) << 7))
	87	(r1 %= 96)
	88	(r1 += (r2 + 32)))
	89	(if (r1 < #xD800)
	90	;; We can't have this character.
	91	((r0 = r1)
	92	(call ccl-mule-utf-untrans)
	93	(r5 = -1)
	94	(r0 = -1))
	95	(if (r1 < #xDC00)
	96	;; The first code of a surrogate pair.
	97	((r5 = r1)
	98	(r0 = -1))
	99	(if (r1 < #xE000)
	100	;; The second code of a surrogate pair, invalid.
	101	((r0 = r1)
	102	(call ccl-mule-utf-untrans)
	103	(r5 = -1)
	104	(r0 = -1))
	105	(if (r1 < #x10000)
	106	((r0 = ,(charset-id 'mule-unicode-e000-ffff))
	107	(r1 -= #xE000)
	108	(r2 = (((r1 / 96) + 32) << 7))
	109	(r1 %= 96)
	110	(r1 += (r2 + 32)))
	111	;; We can't have this character.
	112	((r0 = r1)
	113	(call ccl-mule-utf-untrans)
	114	(r5 = -1)
	115	(r0 = -1)))))))))))))))
4fbc4b17	116
65a0e5fe	117	(defconst utf-16le-decode-loop
95d2d433 KH	118	`((r5 = -1)
	119	(loop
	120	(r3 = -1)
	121	(read r3 r4)
	122	(r1 = (r4 <8 r3))
	123	,@utf-16-decode-ucs
	124	(if (r0 >= 0)
	125	((translate-character utf-translation-table-for-decode r0 r1)
	126	(write-multibyte-character r0 r1)))
	127	(repeat))))
4fbc4b17	128
65a0e5fe	129	(defconst utf-16be-decode-loop
95d2d433 KH	130	`((r5 = -1)
	131	(loop
	132	(r3 = -1)
	133	(read r3 r4)
	134	(r1 = (r3 <8 r4))
	135	,@utf-16-decode-ucs
	136	(if (r0 >= 0)
	137	((translate-character utf-translation-table-for-decode r0 r1)
	138	(write-multibyte-character r0 r1)))
	139	(repeat))))
4fbc4b17 KH	140
4fbc4b17 KH	141	)
fc2938d1	142
65a0e5fe	143	(define-ccl-program ccl-decode-mule-utf-16le
fc2938d1	144	`(2 ; 2 bytes -> 1 to 4 bytes
95d2d433 KH	145	,utf-16le-decode-loop
	146	((if (r5 >= 0)
	147	((r0 = r5)
	148	(call ccl-mule-utf-untrans)))
	149	(if (r3 < 0)
	150	nil
	151	((if (r3 < #xA0)
	152	(r0 = ,(charset-id 'eight-bit-control))
	153	(r0 = ,(charset-id 'eight-bit-graphic)))
	154	(write-multibyte-character r0 r3)))))
2217b8e1	155	"Decode UTF-16LE (little endian without signature bytes).
fc2938d1	156	Basic decoding is done into the charsets ascii, latin-iso8859-1 and
278ce936 KH	157	mule-unicode-*. Un-representable Unicode characters are decoded as
	158	U+fffd. The result is run through the translation-table named
	159	`utf-translation-table-for-decode'.")
fc2938d1	160
65a0e5fe	161	(define-ccl-program ccl-decode-mule-utf-16be
fc2938d1	162	`(2 ; 2 bytes -> 1 to 4 bytes
95d2d433 KH	163	,utf-16be-decode-loop
	164	((if (r5 >= 0)
	165	((r0 = r5)
	166	(call ccl-mule-utf-untrans)))
	167	(if (r3 >= 0)
	168	((r0 = r3)
	169	(call ccl-mule-utf-untrans)))))
2217b8e1	170	"Decode UTF-16BE (big endian without signature bytes).
fc2938d1 DL	171	Basic decoding is done into the charsets ascii, latin-iso8859-1 and
fc2938d1 DL	172	mule-unicode-*. Un-representable Unicode characters are
278ce936 KH	173	decoded as U+fffd. The result is run through the translation-table of
278ce936 KH	174	name `utf-translation-table-for-decode'.")
fc2938d1	175
65a0e5fe	176	(define-ccl-program ccl-decode-mule-utf-16le-with-signature
4fbc4b17	177	`(2
95d2d433 KH	178	((r3 = -1)
	179	(read r3 r4)
	180	,@utf-16le-decode-loop)
	181	(if (r3 >= 0)
	182	((r0 = r3)
	183	(call ccl-mule-utf-untrans))))
65a0e5fe	184	"Like ccl-decode-utf-16le but skip the first 2-byte BOM.")
4fbc4b17	185
65a0e5fe	186	(define-ccl-program ccl-decode-mule-utf-16be-with-signature
4fbc4b17	187	`(2
95d2d433 KH	188	((r3 = -1)
	189	(read r3 r4)
	190	,@utf-16be-decode-loop)
	191	(if (r3 >= 0)
	192	((r0 = r3)
	193	(call ccl-mule-utf-untrans))))
65a0e5fe	194	"Like ccl-decode-utf-16be but skip the first 2-byte BOM.")
4fbc4b17 KH	195
	196	(define-ccl-program ccl-decode-mule-utf-16
	197	`(2
95d2d433 KH	198	((r3 = -1)
95d2d433 KH	199	(read r3 r4)
4fbc4b17	200	(r1 = (r3 <8 r4))
95d2d433	201	(r5 = -1)
4fbc4b17 KH	202	(if (r1 == #xFFFE)
	203	;; R1 is a BOM for little endian. We keep this character as
	204	;; is temporarily. It is removed by post-read-conversion
	205	;; function.
	206	(,@utf-16-decode-ucs
	207	(write-multibyte-character r0 r1)
95d2d433	208	,@utf-16le-decode-loop)
4fbc4b17 KH	209	((if (r1 == #xFEFF)
	210	;; R1 is a BOM for big endian, but we can't keep that
	211	;; character in the output because it can't be
	212	;; distinguished with the normal U+FEFF. So, we keep
	213	;; #xFFFF instead.
	214	((r1 = #xFFFF)
95d2d433 KH	215	,@utf-16-decode-ucs
	216	(write-multibyte-character r0 r1))
	217	;; R1 is a normal Unicode character.
4fbc4b17	218	(,@utf-16-decode-ucs
95d2d433 KH	219	(if (r0 >= 0)
	220	((translate-character utf-translation-table-for-decode r0 r1)
	221	(write-multibyte-character r0 r1)))))
	222	,@utf-16be-decode-loop)))
	223	(if (r3 >= 0)
	224	((r0 = r3)
	225	(call ccl-mule-utf-untrans))))
65a0e5fe	226	"Like ccl-decode-utf-16be/le but check the first BOM.")
4fbc4b17	227
fc2938d1	228	(makunbound 'utf-16-decode-ucs) ; done with it
65a0e5fe KH	229	(makunbound 'utf-16le-decode-loop)
65a0e5fe KH	230	(makunbound 'utf-16be-decode-loop)
fc2938d1	231
95d2d433 KH	232	;; UTF-16 decoder generates an UTF-8 sequence represented by a
	233	;; sequence eight-bit-control/graphic chars for an invalid byte (the
	234	;; last byte of an odd length source) and an untranslatable character
	235	;; (including an invalid surrogate-pair code-point).
	236	;;
	237	;; This CCL parses that sequence (the first byte is already in r1),
	238	;; and if the sequence represents an untranslatable character, it sets
	239	;; r1 to the original invalid code or untranslated Unicode character
	240	;; code, sets r2 to -1 (to prevent r2 and r3 are written), set2 r5 to
	241	;; -1 (to tell the caller that there's no pre-read character).
	242	;;
	243	;; If the sequence represents an invalid byte, it sets r1 to -1, r2 to
	244	;; the byte, sets r3 and r5 to -1.
	245	;;
	246	;; Otherwise, don't change r1, set r2 and r3 to already read
	247	;; eight-bit-control/graphic characters (if any), set r5 and r6 to the
	248	;; last character that invalidates the UTF-8 form.
	249	;;
	250	;; Note: For UTF-8 validation, we only check if a character is
	251	;; eight-bit-control/graphic or not. It may result in incorrect
	252	;; handling of random binary data, but such a data can't be encoded by
	253	;; UTF-16 anyway. At least, UTF-16 decoder doesn't generate such a
	254	;; sequence even if a source contains invalid byte-sequence.
	255
	256	(define-ccl-program ccl-mule-utf-16-encode-untrans
	257	`(0
	258	((r2 = -1)
	259	;; Read the 2nd byte.
	260	(read-multibyte-character r5 r6)
	261	(r0 = (r5 != ,(charset-id 'eight-bit-control)))
	262	(if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
	263	((r2 = r1)
	264	(r3 = -1)
	265	(r1 = -1)
	266	(end))) ; invalid UTF-8
	267
	268	(r3 = -1)
	269	(r2 = r6)
	270	(if (r1 <= #xE0)
	271	;; 2-byte UTF-8, i.e. originally an invalid byte.
	272	((r2 &= #x3F)
	273	(r2 \|= ((r1 & #x1F) << 6))
	274	(r1 = -1)
	275	(r5 = -1)
	276	(end)))
	277
	278	;; Read the 3rd byte.
	279	(read-multibyte-character r5 r6)
	280	(r0 = (r5 != ,(charset-id 'eight-bit-control)))
	281	(if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
	282	((end))) ; invalid UTF-8
	283
	284	(if (r1 < #xF0) ; valid 3-byte UTF-8
	285	((r1 = ((r1 & #x0F) << 12))
	286	(r1 \|= ((r2 & #x3F) << 6))
	287	(r1 \|= (r6 & #x3F))
	288	(r2 = -1)
	289	(r5 = -1)
	290	(end)))
	291
	292	(r3 = r6)
	293	;; Read the 4th byte.
	294	(read-multibyte-character r5 r6)
	295	(r0 = (r5 != ,(charset-id 'eight-bit-control)))
296	(if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
297	(end)) ; livalid UTF-8
298
299	;; valid 4-byte UTF-8
300	(r1 = ((r1 & #x07) << 18))
301	(r1 \|= ((r2 & #x3F) << 12))
302	(r1 \|= ((r3 & #x3F) << 6))
303	(r1 \|= (r6 & #x3F))
304	(r2 = -1)
305	(r5 = -1)
306	(end))
307
308	(if (r1 >= 0)
309	((write r1)
310	(if (r2 >= 0)
311	((write r2)
312	(if (r3 >= 0)
313	(write r3))))))))
314
fc2938d1 DL	315	(eval-and-compile
fc2938d1 DL	316	(defconst utf-16-decode-to-ucs
95d2d433 KH	317	;; Read a character and set r1 to the corresponding Unicode code.
	318	;; If r5 is not negative, it means that we have already read a
	319	;; character into r5 and r6.
	320	;; If an invalid eight-bit-control/graphic sequence is found, r2 and
	321	;; r3 may contain a byte to written out, r5 and r6 may contain a
	322	;; pre-read character. Usually they are set to -1.
	323	`((if (r5 < 0)
	324	(read-multibyte-character r0 r1)
	325	((r0 = r5)
	326	(r1 = r6)
	327	(r5 = -1)))
	328	(lookup-character utf-subst-table-for-encode r0 r1)
	329	(r2 = -1)
	330	(if (r7 > 0)
	331	(r1 = r0)
	332	((translate-character utf-translation-table-for-encode r0 r1)
	333	(if (r0 == ,(charset-id 'ascii))
	334	nil
	335	(if (r0 == ,(charset-id 'latin-iso8859-1))
	336	(r1 += 128)
	337	(if (r0 == ,(charset-id 'eight-bit-control))
	338	nil
	339	(if (r0 == ,(charset-id 'eight-bit-graphic))
	340	(call ccl-mule-utf-16-encode-untrans)
	341	((r2 = ((r1 & #x7f) - 32))
	342	(r3 = ((((r1 >> 7) - 32) * 96) + r2))
	343	(r2 = -1)
	344	(r5 = -1)
	345	(if (r0 == ,(charset-id 'mule-unicode-0100-24ff))
	346	(r1 = (r3 + #x100))
	347	(if (r0 == ,(charset-id 'mule-unicode-2500-33ff))
	348	(r1 = (r3 + #x2500))
	349	(if (r0 == ,(charset-id 'mule-unicode-e000-ffff))
	350	(r1 = (r3 + #xe000))
	351	(r1 = #xfffd)))))))))))))
4fbc4b17	352
65a0e5fe	353	(defconst utf-16le-encode-loop
95d2d433 KH	354	`((r5 = -1)
	355	(loop
	356	,@utf-16-decode-to-ucs
	357	(if (r1 >= #x10000)
	358	((r1 -= #x10000)
	359	(r0 = ((r1 >> 10) + #xD800))
	360	(write (r0 & 255))
	361	(write (r0 >> 8))
	362	(r1 = ((r1 & #x3FF) + #xDC00))))
	363	(if (r1 >= 0)
	364	((write (r1 & 255))
	365	(write (r1 >> 8))))
	366	(if (r2 >= 0)
	367	((write r2)
	368	(if (r3 >= 0)
	369	(write r3))))
	370	(repeat))))
4fbc4b17	371
65a0e5fe	372	(defconst utf-16be-encode-loop
95d2d433 KH	373	`((r5 = -1)
	374	(loop
	375	,@utf-16-decode-to-ucs
	376	(if (r1 >= #x10000)
	377	((r1 -= #x10000)
	378	(r0 = ((r1 >> 10) + #xD800))
	379	(write (r0 >> 8))
	380	(write (r0 & 255))
	381	(r1 = ((r1 & #x3FF) + #xDC00))))
	382	(if (r1 >= 0)
	383	((write (r1 >> 8))
	384	(write (r1 & 255))))
	385	(if (r2 >= 0)
	386	((write r2)
	387	(if (r3 >= 0)
	388	(write r3))))
	389	(repeat))))
4fbc4b17	390	)
fc2938d1	391
65a0e5fe KH	392
65a0e5fe KH	393	(define-ccl-program ccl-encode-mule-utf-16le
dbaba2d2	394	`(2
65a0e5fe	395	,utf-16le-encode-loop)
2217b8e1	396	"Encode to UTF-16LE (little endian without signature).
fc2938d1 DL	397	Characters from the charsets ascii, eight-bit-control,
fc2938d1 DL	398	eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
278ce936 KH	399	after translation through the translation-table of name
278ce936 KH	400	`utf-translation-table-for-encode'.
fc2938d1 DL	401	Others are encoded as U+FFFD.")
fc2938d1 DL	402
65a0e5fe	403	(define-ccl-program ccl-encode-mule-utf-16be
dbaba2d2	404	`(2
65a0e5fe	405	,utf-16be-encode-loop)
2217b8e1	406	"Encode to UTF-16BE (big endian without signature).
fc2938d1 DL	407	Characters from the charsets ascii, eight-bit-control,
fc2938d1 DL	408	eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
278ce936 KH	409	after translation through the translation-table named
278ce936 KH	410	`utf-translation-table-for-encode'.
fc2938d1 DL	411	Others are encoded as U+FFFD.")
fc2938d1 DL	412
65a0e5fe	413	(define-ccl-program ccl-encode-mule-utf-16le-with-signature
f7c4d755	414	`(2
4fbc4b17 KH	415	((write #xFF)
4fbc4b17 KH	416	(write #xFE)
95d2d433	417	,@utf-16le-encode-loop))
4fbc4b17 KH	418	"Encode to UTF-16 (little endian with signature).
	419	Characters from the charsets ascii, eight-bit-control,
	420	eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
	421	after translation through the translation-table of name
	422	`utf-translation-table-for-encode'.
	423	Others are encoded as U+FFFD.")
	424
65a0e5fe	425	(define-ccl-program ccl-encode-mule-utf-16be-with-signature
f7c4d755	426	`(2
4fbc4b17 KH	427	((write #xFE)
4fbc4b17 KH	428	(write #xFF)
95d2d433	429	,@utf-16be-encode-loop))
4fbc4b17 KH	430	"Encode to UTF-16 (big endian with signature).
	431	Characters from the charsets ascii, eight-bit-control,
	432	eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
	433	after translation through the translation-table named
	434	`utf-translation-table-for-encode'.
	435	Others are encoded as U+FFFD.")
	436
fc2938d1	437	(makunbound 'utf-16-decode-to-ucs)
65a0e5fe KH	438	(makunbound 'utf-16le-encode-loop)
65a0e5fe KH	439	(makunbound 'utf-16be-encode-loop)
4fbc4b17 KH	440
	441	(defun mule-utf-16-post-read-conversion (length)
	442	(when (> length 0)
95d2d433	443	(setq length (utf-8-post-read-conversion length))
4fbc4b17 KH	444	(let ((char (following-char)))
	445	(cond ((= char (decode-char 'ucs #xFFFE))
	446	(delete-char 1)
	447	(setq last-coding-system-used
	448	(coding-system-change-text-conversion
	449	last-coding-system-used
65a0e5fe	450	'mule-utf-16le-with-signature))
4fbc4b17 KH	451	(setq length (1- length)))
	452	((= char (decode-char 'ucs #xFFFF))
	453	(delete-char 1)
	454	(setq last-coding-system-used
	455	(coding-system-change-text-conversion
	456	last-coding-system-used
65a0e5fe	457	'mule-utf-16be-with-signature))
4fbc4b17 KH	458	(setq length (1- length)))
4fbc4b17 KH	459	(t
65a0e5fe	460	(setq last-coding-system-used 'mule-utf-16be)))))
4fbc4b17	461	length)
fc2938d1	462
fc2938d1 DL	463	(let ((doc "
fc2938d1 DL	464
278ce936 KH	465	It supports Unicode characters of these ranges:
	466	U+0000..U+33FF, U+E000..U+FFFF.
	467	They correspond to these Emacs character sets:
	468	ascii, latin-iso8859-1, mule-unicode-0100-24ff,
	469	mule-unicode-2500-33ff, mule-unicode-e000-ffff
	470
	471	On decoding (e.g. reading a file), Unicode characters not in the above
	472	ranges are decoded as U+FFFD, effectively corrupting the data
a1506d29	473	if they are re-encoded.
278ce936 KH	474
	475	On encoding (e.g. writing a file), Emacs characters not belonging to
	476	any of the character sets listed above are encoded into the byte
95d2d433 KH	477	sequence representing U+FFFD (REPLACEMENT CHARACTER).")
	478	(props `((safe-charsets
	479	ascii
	480	eight-bit-control
	481	eight-bit-graphic
	482	latin-iso8859-1
	483	mule-unicode-0100-24ff
	484	mule-unicode-2500-33ff
	485	mule-unicode-e000-ffff
	486	,@(if utf-translate-cjk-mode
	487	utf-translate-cjk-charsets))
	488	(valid-codes (0 . 255))
	489	(mime-text-unsuitable . t)
	490	(pre-write-conversion . utf-8-pre-write-conversion)
	491	(dependency unify-8859-on-encoding-mode
	492	unify-8859-on-decoding-mode
	493	utf-fragment-on-decoding
	494	utf-translate-cjk-mode))))
fc2938d1	495	(make-coding-system
65a0e5fe	496	'mule-utf-16le 4
fc2938d1 DL	497	?u ; Mule-UCS uses ?U, but code-pages uses that for koi8-u.
fc2938d1 DL	498	(concat
65a0e5fe	499	"UTF-16LE encoding for Emacs-supported Unicode characters."
fc2938d1	500	doc)
65a0e5fe	501	'(ccl-decode-mule-utf-16le . ccl-encode-mule-utf-16le)
95d2d433 KH	502	`(,@props
	503	(post-read-conversion . utf-8-post-read-conversion)
	504	(mime-charset . utf-16le)))
fc2938d1 DL	505
fc2938d1 DL	506	(make-coding-system
65a0e5fe	507	'mule-utf-16be 4 ?u
fc2938d1	508	(concat
65a0e5fe	509	"UTF-16BE encoding for Emacs-supported Unicode characters."
fc2938d1 DL	510	doc)
fc2938d1 DL	511
65a0e5fe	512	'(ccl-decode-mule-utf-16be . ccl-encode-mule-utf-16be)
95d2d433 KH	513	`(,@props
	514	(post-read-conversion . utf-8-post-read-conversion)
	515	(mime-charset . utf-16be)))
4fbc4b17 KH	516
4fbc4b17 KH	517	(make-coding-system
65a0e5fe	518	'mule-utf-16le-with-signature 4 ?u
4fbc4b17 KH	519	(concat
	520	"Little endian UTF-16 (with BOM) for Emacs-supported Unicode characters."
	521	doc)
	522
65a0e5fe KH	523	'(ccl-decode-mule-utf-16le-with-signature
65a0e5fe KH	524	. ccl-encode-mule-utf-16le-with-signature)
95d2d433 KH	525	`(,@props
95d2d433 KH	526	(post-read-conversion . utf-8-post-read-conversion)
4fbc4b17	527	(coding-category . coding-category-utf-16-le)
95d2d433	528	(mime-charset . utf-16)))
4fbc4b17 KH	529
4fbc4b17 KH	530	(make-coding-system
65a0e5fe	531	'mule-utf-16be-with-signature 4 ?u
4fbc4b17 KH	532	(concat
	533	"Big endian UTF-16 (with BOM) for Emacs-supported Unicode characters."
	534	doc)
	535
65a0e5fe KH	536	'(ccl-decode-mule-utf-16be-with-signature
65a0e5fe KH	537	. ccl-encode-mule-utf-16be-with-signature)
95d2d433 KH	538	`(,@props
95d2d433 KH	539	(post-read-conversion . utf-8-post-read-conversion)
4fbc4b17	540	(coding-category . coding-category-utf-16-be)
95d2d433	541	(mime-charset . utf-16)))
4fbc4b17 KH	542
	543	(make-coding-system
	544	'mule-utf-16 4 ?u
	545	(concat
	546	"UTF-16 (with or without BOM) for Emacs-supported Unicode characters."
	547	doc)
	548
65a0e5fe	549	'(ccl-decode-mule-utf-16 . ccl-encode-mule-utf-16be-with-signature)
95d2d433 KH	550	`(,@props
95d2d433 KH	551	(post-read-conversion . mule-utf-16-post-read-conversion)
fc2938d1	552	(coding-category . coding-category-utf-16-be)
95d2d433	553	(mime-charset . utf-16)))
4fbc4b17	554	)
fc2938d1	555
65a0e5fe KH	556	(define-coding-system-alias 'utf-16le 'mule-utf-16le)
	557	(define-coding-system-alias 'utf-16be 'mule-utf-16be)
	558	(define-coding-system-alias 'utf-16le-with-signature
	559	'mule-utf-16le-with-signature)
	560	(define-coding-system-alias 'utf-16be-with-signature
	561	'mule-utf-16be-with-signature)
4fbc4b17	562	(define-coding-system-alias 'utf-16 'mule-utf-16)
fc2938d1	563
65a0e5fe KH	564	;; For backward compatibility.
	565	(define-coding-system-alias 'mule-utf-16-le 'mule-utf-16le-with-signature)
	566	(define-coding-system-alias 'utf-16-le 'mule-utf-16le-with-signature)
	567	(define-coding-system-alias 'mule-utf-16-be 'mule-utf-16be-with-signature)
	568	(define-coding-system-alias 'utf-16-be 'mule-utf-16be-with-signature)
	569
ab5796a9	570	;;; arch-tag: 85455d46-d9c9-466d-a6f3-c3582a7367c4
fc2938d1	571	;;; utf-16.el ends here