1 ;;; rfc2047.el --- Functions for encoding and decoding rfc2047 messages
2 ;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
6 ;; This file is part of GNU Emacs.
8 ;; GNU Emacs is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; GNU Emacs is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GNU Emacs; see the file COPYING. If not, write to the
20 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 ;; Boston, MA 02111-1307, USA.
29 '(unless (fboundp 'base64-decode-string
)
37 (defvar rfc2047-header-encoding-alist
38 '(("Newsgroups" . nil
)
41 "*Header/encoding method alist.
42 The list is traversed sequentially. The keys can either be
43 header regexps or `t'.
47 1) nil, in which case no encoding is done;
48 2) `mime', in which case the header will be encoded according to RFC2047;
49 3) a charset, in which case it will be encoded as that charset;
50 4) `default', in which case the field will be encoded as the rest
53 (defvar rfc2047-charset-encoding-alist
72 "Alist of MIME charsets to RFC2047 encodings.
73 Valid encodings are nil, `Q' and `B'.")
75 (defvar rfc2047-encoding-function-alist
76 '((Q . rfc2047-q-encode-region
)
77 (B . rfc2047-b-encode-region
)
79 "Alist of RFC2047 encodings to encoding functions.")
81 (defvar rfc2047-q-encoding-alist
82 '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" .
"-A-Za-z0-9!*+/")
83 ;; = (\075), _ (\137), ? (\077) are used in the encoded word.
84 ;; Avoid using 8bit characters. Some versions of Emacs has bug!
85 ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?"
86 ("." .
"\010\012\014\040-\074\076\100-\136\140-\177"))
87 "Alist of header regexps and valid Q characters.")
90 ;;; Functions for encoding RFC2047 messages
93 (defun rfc2047-narrow-to-field ()
94 "Narrow the buffer to the header on the current line."
100 (if (re-search-forward "^[^ \n\t]" nil t
)
105 (goto-char (point-min)))
107 (defun rfc2047-encode-message-header ()
108 "Encode the message header according to `rfc2047-header-encoding-alist'.
109 Should be called narrowed to the head of the message."
112 (goto-char (point-min))
113 (let (alist elem method
)
116 (rfc2047-narrow-to-field)
117 (if (not (rfc2047-encodable-p))
118 (if (and (eq (mm-body-7-or-8) '8bit
)
121 (car message-posting-charset
)))
122 ;; 8 bit must be decoded.
123 ;; Is message-posting-charset a coding system?
124 (mm-encode-coding-region
125 (point-min) (point-max)
126 (car message-posting-charset
)))
127 ;; We found something that may perhaps be encoded.
129 alist rfc2047-header-encoding-alist
)
130 (while (setq elem
(pop alist
))
131 (when (or (and (stringp (car elem
))
132 (looking-at (car elem
)))
138 (rfc2047-encode-region (point-min) (point-max))
139 (rfc2047-fold-region (point-min) (point-max)))
140 ((eq method
'default
)
141 (if (and (featurep 'mule
)
143 (mm-encode-coding-region (point-min) (point-max)
144 mail-parse-charset
)))
145 ((mm-coding-system-p method
)
147 (mm-encode-coding-region (point-min) (point-max) method
)))
150 (goto-char (point-max)))))))
152 (defun rfc2047-encodable-p (&optional header
)
153 "Say whether the current (narrowed) buffer contains characters that need encoding in headers."
157 (mm-find-charset-region (point-min) (point-max))))
158 (cs (list 'us-ascii
(car message-posting-charset
)))
161 (unless (memq (pop charsets
) cs
)
165 (defun rfc2047-dissect-region (b e
)
166 "Dissect the region between B and E into words."
167 (let ((all-specials (concat ietf-drums-tspecials
" \t\n\r"))
168 (special-list (mapcar 'identity ietf-drums-tspecials
))
169 (blank-list '(? ?
\t ?
\n ?
\r))
170 words current cs state mail-parse-mule-charset
)
172 (narrow-to-region b e
)
173 (goto-char (point-min))
174 (skip-chars-forward all-specials
)
180 (if (not (eq (setq cs
(mm-charset-after)) 'ascii
))
185 ((memq (char-after) special-list
)
187 ((memq (char-after) blank-list
))
192 (if (not (eq (setq cs
(mm-charset-after)) 'ascii
))
193 (setq current cs
)))))
196 ((memq (char-after) special-list
)
198 (push (list b
(point) current
) words
)
200 ((memq (char-after) blank-list
)
204 (push (list b
(point) current
) words
)
207 ((or (eq (setq cs
(mm-charset-after)) 'ascii
)
212 (push (list b
(point) current
) words
)
217 (skip-chars-forward all-specials
)))
219 (push (list b
(point) current
) words
)))
222 (defun rfc2047-encode-region (b e
)
223 "Encode all encodable words in REGION."
224 (let ((words (rfc2047-dissect-region b e
))
225 beg end current word
)
226 (while (setq word
(pop words
))
227 (if (equal (nth 2 word
) current
)
228 (setq beg
(nth 0 word
))
230 (if (and (eq beg
(nth 1 word
)) (nth 2 word
))
232 ;; There might be a bug in Emacs Mule.
233 ;; A space must be inserted before encoding.
236 (rfc2047-encode (1+ beg
) (1+ end
) current
))
237 (rfc2047-encode beg end current
)))
238 (setq current
(nth 2 word
)
242 (rfc2047-encode beg end current
))))
244 (defun rfc2047-encode-string (string)
245 "Encode words in STRING."
248 (rfc2047-encode-region (point-min) (point-max))
251 (defun rfc2047-encode (b e charset
)
252 "Encode the word in the region with CHARSET."
253 (let* ((mime-charset (mm-mime-charset charset
))
254 (encoding (or (cdr (assq mime-charset
255 rfc2047-charset-encoding-alist
))
258 "=?" (downcase (symbol-name mime-charset
)) "?"
259 (downcase (symbol-name encoding
)) "?"))
262 (narrow-to-region b e
)
263 (when (eq encoding
'B
)
264 ;; break into lines before encoding
265 (goto-char (point-min))
267 (goto-char (min (point-max) (+ 15 (point))))
270 (if (and (mm-multibyte-p)
271 (mm-coding-system-p mime-charset
))
272 (mm-encode-coding-region (point-min) (point-max) mime-charset
))
273 (funcall (cdr (assq encoding rfc2047-encoding-function-alist
))
274 (point-min) (point-max))
275 (goto-char (point-min))
285 (defun rfc2047-fold-region (b e
)
286 "Fold the long lines in the region."
288 (narrow-to-region b e
)
289 (goto-char (point-min))
293 ((memq (char-after) '(? ?
\t))
294 (setq break
(point)))
297 (setq break
(point)))
300 (> (- (point) (save-excursion (beginning-of-line) (point))) 76))
305 (forward-char 1))))))
307 (defun rfc2047-b-encode-region (b e
)
308 "Encode the header contained in REGION with the B encoding."
310 (narrow-to-region (goto-char b
) e
)
312 (base64-encode-region (point) (progn (end-of-line) (point)) t
)
313 (if (and (bolp) (eolp))
314 (delete-backward-char 1))
317 (defun rfc2047-q-encode-region (b e
)
318 "Encode the header contained in REGION with the Q encoding."
321 (narrow-to-region (goto-char b
) e
)
322 (let ((alist rfc2047-q-encoding-alist
))
324 (when (looking-at (caar alist
))
325 (quoted-printable-encode-region b e nil
(cdar alist
))
326 (subst-char-in-region (point-min) (point-max) ? ?_
)
329 (goto-char (point-min))
331 (goto-char (min (point-max) (+ 64 (point))))
332 (search-backward "=" (- (point) 2) t
)
337 ;;; Functions for decoding RFC2047 messages
340 (defvar rfc2047-encoded-word-regexp
341 "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\\([!->@-~ +]+\\)\\?=")
343 (defun rfc2047-decode-region (start end
)
344 "Decode MIME-encoded words in region between START and END."
346 (let ((case-fold-search t
)
350 (narrow-to-region start end
)
351 (goto-char (point-min))
352 ;; Remove whitespace between encoded words.
353 (while (re-search-forward
354 (concat "\\(" rfc2047-encoded-word-regexp
"\\)"
356 "\\(" rfc2047-encoded-word-regexp
"\\)")
358 (delete-region (goto-char (match-end 1)) (match-beginning 6)))
359 ;; Decode the encoded words.
360 (setq b
(goto-char (point-min)))
361 (while (re-search-forward rfc2047-encoded-word-regexp nil t
)
362 (setq e
(match-beginning 0))
363 (insert (rfc2047-parse-and-decode
366 (delete-region (match-beginning 0) (match-end 0)))))
367 (when (and (mm-multibyte-p)
369 (not (eq mail-parse-charset
'gnus-decoded
)))
370 (mm-decode-coding-region b e mail-parse-charset
))
372 (when (and (mm-multibyte-p)
374 (not (eq mail-parse-charset
'us-ascii
))
375 (not (eq mail-parse-charset
'gnus-decoded
)))
376 (mm-decode-coding-region b
(point-max) mail-parse-charset
))))))
378 (defun rfc2047-decode-string (string)
379 "Decode the quoted-printable-encoded STRING and return the results."
380 (let ((m (mm-multibyte-p)))
383 (mm-enable-multibyte))
386 (rfc2047-decode-region (point-min) (point-max)))
389 (defun rfc2047-parse-and-decode (word)
390 "Decode WORD and return it if it is an encoded word.
392 (if (not (string-match rfc2047-encoded-word-regexp word
))
397 (match-string 1 word
)
398 (upcase (match-string 2 word
))
399 (match-string 3 word
))
403 (defun rfc2047-decode (charset encoding string
)
404 "Decode STRING that uses CHARSET with ENCODING.
405 Valid ENCODINGs are \"B\" and \"Q\".
406 If your Emacs implementation can't decode CHARSET, it returns nil."
407 (if (stringp charset
)
408 (setq charset
(intern (downcase charset
))))
409 (if (or (not charset
)
410 (eq 'gnus-all mail-parse-ignored-charsets
)
411 (memq 'gnus-all mail-parse-ignored-charsets
)
412 (memq charset mail-parse-ignored-charsets
))
413 (setq charset mail-parse-charset
))
414 (let ((cs (mm-charset-to-coding-system charset
)))
415 (if (and (not cs
) charset
416 (listp mail-parse-ignored-charsets
)
417 (memq 'gnus-unknown mail-parse-ignored-charsets
))
418 (setq cs
(mm-charset-to-coding-system mail-parse-charset
)))
420 (when (and (eq cs
'ascii
)
422 (setq cs mail-parse-charset
))
423 (mm-decode-coding-string
425 ((equal "B" encoding
)
426 (base64-decode-string string
))
427 ((equal "Q" encoding
)
428 (quoted-printable-decode-string
429 (mm-replace-chars-in-string string ?_ ?
)))
430 (t (error "Invalid encoding: %s" encoding
)))
435 ;;; rfc2047.el ends here