| 1 | ;;; utf-7.el --- utf-7 coding system |
| 2 | |
| 3 | ;; Copyright (C) 2003 Free Software Foundation, Inc. |
| 4 | |
| 5 | ;; Author: Dave Love <fx@gnu.org> |
| 6 | ;; Keywords: i18n, mail |
| 7 | |
| 8 | ;; This file is free software; you can redistribute it and/or modify |
| 9 | ;; it under the terms of the GNU General Public License as published by |
| 10 | ;; the Free Software Foundation; either version 2, or (at your option) |
| 11 | ;; any later version. |
| 12 | |
| 13 | ;; This file is distributed in the hope that it will be useful, |
| 14 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | ;; GNU General Public License for more details. |
| 17 | |
| 18 | ;; You should have received a copy of the GNU General Public License |
| 19 | ;; along with GNU Emacs; see the file COPYING. If not, write to |
| 20 | ;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 21 | ;; Boston, MA 02110-1301, USA. |
| 22 | |
| 23 | ;;; Commentary: |
| 24 | |
| 25 | ;; Defines a coding system for UTF-7, defined in RFC 2152. Non-ASCII |
| 26 | ;; segments are encoded as base64-encoded big endian UTF-16. Also |
| 27 | ;; defines a variation required for IMAP (RFC 2060). |
| 28 | |
| 29 | ;; The encoding and decoding was originally taken from Jon K Hellan's |
| 30 | ;; implementation in Gnus, but has been substantially re-done. |
| 31 | |
| 32 | ;; This probably needs more attention. In particular, it's not |
| 33 | ;; completely consistent with iconv's behaviour. It's arguable |
| 34 | ;; whether the IMAP version should be a coding system since it's |
| 35 | ;; apparently only used for IMAP mailbox names, so it's commented out. |
| 36 | |
| 37 | ;;; Code: |
| 38 | |
| 39 | ;; (define-coding-system 'utf-7-imap |
| 40 | ;; "UTF-7 encoding of Unicode, IMAP version (RFC 2060)" |
| 41 | ;; :coding-type 'utf-8 |
| 42 | ;; :mnemonic ?u |
| 43 | ;; :charset-list '(unicode) |
| 44 | ;; :pre-write-conversion 'utf-7-imap-pre-write-conversion |
| 45 | ;; :post-read-conversion 'utf-7-imap-post-read-conversion) |
| 46 | |
| 47 | (defun utf-7-decode (len imap) |
| 48 | "Decode LEN bytes of UTF-7 at point. |
| 49 | IMAP non-nil means use the IMAP version." |
| 50 | (save-excursion |
| 51 | (save-restriction |
| 52 | (narrow-to-region (point) (+ (point) len)) |
| 53 | (let ((not-esc (if imap "^&" "^+")) |
| 54 | (skip-chars (if imap "A-Za-z0-9+," "A-Za-z0-9+/"))) |
| 55 | (while (not (eobp)) |
| 56 | (skip-chars-forward not-esc) |
| 57 | (unless (eobp) |
| 58 | (forward-char) |
| 59 | (let ((p (point)) |
| 60 | (run-length (skip-chars-forward skip-chars))) |
| 61 | (if (eq ?- (char-after)) |
| 62 | (delete-char 1)) |
| 63 | (unless (= run-length 0) ; encoded lone esc-char |
| 64 | (let ((pl (mod (- run-length) 4))) |
| 65 | (insert-char ?= pl) |
| 66 | (if imap |
| 67 | (subst-char-in-region p (point) ?, ?/)) |
| 68 | (base64-decode-region p (point))) |
| 69 | (decode-coding-region p (point) 'utf-16be) |
| 70 | (save-excursion |
| 71 | (goto-char p) |
| 72 | (delete-backward-char 1))))))) |
| 73 | (- (point-max) (point-min))))) |
| 74 | |
| 75 | ;;;###autoload |
| 76 | (defun utf-7-post-read-conversion (len) |
| 77 | (utf-7-decode len nil)) |
| 78 | |
| 79 | ;; (defun utf-7-imap-post-read-conversion (len) |
| 80 | ;; (utf-7-decode len t)) |
| 81 | |
| 82 | (defun utf-7-encode (from to imap) |
| 83 | "Encode bytes between FROM and TO to UTF-7. |
| 84 | ESC and SKIP-CHARS are adjusted for the normal and IMAP versions." |
| 85 | (let* ((old-buf (current-buffer)) |
| 86 | (esc (if imap ?& ?+)) |
| 87 | ;; These are characters which can be encoded asis. |
| 88 | (skip-chars (if imap |
| 89 | "\t\n\r\x20-\x25\x27-\x7e" ; rfc2060 |
| 90 | ;; This includes the rfc2152 optional set. |
| 91 | ;; Perhaps it shouldn't (like iconv). |
| 92 | "\t\n\r -*,-[]-}")) |
| 93 | (not-skip-chars (format "^%s%c" skip-chars esc))) |
| 94 | (set-buffer (generate-new-buffer " *temp*")) |
| 95 | (if (stringp from) |
| 96 | (insert from) |
| 97 | (insert-buffer-substring old-buf from to)) |
| 98 | (goto-char (point-min)) |
| 99 | (while (not (eobp)) |
| 100 | (skip-chars-forward skip-chars) |
| 101 | (if (eq ?+ (char-after)) |
| 102 | (progn (forward-char) |
| 103 | (insert ?-)) |
| 104 | (unless (eobp) |
| 105 | (insert esc) |
| 106 | (let ((p (point))) |
| 107 | (skip-chars-forward not-skip-chars) |
| 108 | (save-restriction |
| 109 | ;; encode-coding-region doesn't preserve point |
| 110 | (narrow-to-region p (point)) |
| 111 | (encode-coding-region p (point-max) 'utf-16be) |
| 112 | (base64-encode-region p (point-max)) |
| 113 | (if imap |
| 114 | (subst-char-in-region p (point-max) ?/ ?,)) |
| 115 | (goto-char p) |
| 116 | ;; As I read the RFC, this isn't correct, but it's |
| 117 | ;; consistent with iconv, at least regarding `='. |
| 118 | (skip-chars-forward "^= \t\n") |
| 119 | (delete-region (point) (point-max)))) |
| 120 | (unless (eobp) |
| 121 | (insert ?-))))) |
| 122 | nil)) |
| 123 | |
| 124 | ;;;###autoload |
| 125 | (defun utf-7-pre-write-conversion (from to) |
| 126 | (utf-7-encode from to nil)) |
| 127 | |
| 128 | ;; (defun utf-7-imap-pre-write-conversion (from to) |
| 129 | ;; (utf-7-encode from to t)) |
| 130 | |
| 131 | (provide 'utf-7) |
| 132 | |
| 133 | ;;; arch-tag: 975ee403-90a4-4286-97d2-4ed1323f4ef9 |
| 134 | ;;; utf-7.el ends here |