1 ;;; rfc2047.el --- Functions for encoding and decoding rfc2047 messages
2 ;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
6 ;; This file is part of GNU Emacs.
8 ;; GNU Emacs is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; GNU Emacs is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GNU Emacs; see the file COPYING. If not, write to the
20 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 ;; Boston, MA 02111-1307, USA.
29 '(unless (fboundp 'base64-decode-string
)
37 (defvar rfc2047-header-encoding-alist
38 '(("Newsgroups" . nil
)
41 "*Header/encoding method alist.
42 The list is traversed sequentially. The keys can either be
43 header regexps or `t'.
47 1) nil, in which case no encoding is done;
48 2) `mime', in which case the header will be encoded according to RFC2047;
49 3) a charset, in which case it will be encoded as that charset;
50 4) `default', in which case the field will be encoded as the rest
53 (defvar rfc2047-charset-encoding-alist
72 "Alist of MIME charsets to RFC2047 encodings.
73 Valid encodings are nil, `Q' and `B'.")
75 (defvar rfc2047-encoding-function-alist
76 '((Q . rfc2047-q-encode-region
)
77 (B . rfc2047-b-encode-region
)
79 "Alist of RFC2047 encodings to encoding functions.")
81 (defvar rfc2047-q-encoding-alist
82 '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" .
"-A-Za-z0-9!*+/=_")
83 ("." .
"^\000-\007\011\013\015-\037\200-\377=_?"))
84 "Alist of header regexps and valid Q characters.")
87 ;;; Functions for encoding RFC2047 messages
90 (defun rfc2047-narrow-to-field ()
91 "Narrow the buffer to the header on the current line."
97 (if (re-search-forward "^[^ \n\t]" nil t
)
102 (goto-char (point-min)))
104 (defun rfc2047-encode-message-header ()
105 "Encode the message header according to `rfc2047-header-encoding-alist'.
106 Should be called narrowed to the head of the message."
109 (goto-char (point-min))
110 (let (alist elem method
)
113 (rfc2047-narrow-to-field)
114 (if (not (rfc2047-encodable-p))
115 (if (and (eq (mm-body-7-or-8) '8bit
)
118 (car message-posting-charset
)))
119 ;; 8 bit must be decoded.
120 ;; Is message-posting-charset a coding system?
121 (mm-encode-coding-region
122 (point-min) (point-max)
123 (car message-posting-charset
)))
124 ;; We found something that may perhaps be encoded.
126 alist rfc2047-header-encoding-alist
)
127 (while (setq elem
(pop alist
))
128 (when (or (and (stringp (car elem
))
129 (looking-at (car elem
)))
135 (rfc2047-encode-region (point-min) (point-max))
136 (rfc2047-fold-region (point-min) (point-max)))
137 ((eq method
'default
)
138 (if (and (featurep 'mule
)
140 (mm-encode-coding-region (point-min) (point-max)
141 mail-parse-charset
)))
142 ((mm-coding-system-p method
)
144 (mm-encode-coding-region (point-min) (point-max) method
)))
147 (goto-char (point-max)))))))
149 (defun rfc2047-encodable-p (&optional header
)
150 "Say whether the current (narrowed) buffer contains characters that need encoding in headers."
154 (mm-find-charset-region (point-min) (point-max))))
155 (cs (list 'us-ascii
(car message-posting-charset
)))
158 (unless (memq (pop charsets
) cs
)
162 (defun rfc2047-dissect-region (b e
)
163 "Dissect the region between B and E into words."
164 (let ((all-specials (concat ietf-drums-tspecials
" \t\n\r"))
165 (special-list (mapcar 'identity ietf-drums-tspecials
))
166 (blank-list '(? ?
\t ?
\n ?
\r))
167 words current cs state mail-parse-mule-charset
)
169 (narrow-to-region b e
)
170 (goto-char (point-min))
171 (skip-chars-forward all-specials
)
177 (if (not (eq (setq cs
(mm-charset-after)) 'ascii
))
182 ((memq (char-after) special-list
)
184 ((memq (char-after) blank-list
))
189 (if (not (eq (setq cs
(mm-charset-after)) 'ascii
))
190 (setq current cs
)))))
193 ((memq (char-after) special-list
)
195 (push (list b
(point) current
) words
)
197 ((memq (char-after) blank-list
)
201 (push (list b
(point) current
) words
)
204 ((or (eq (setq cs
(mm-charset-after)) 'ascii
)
209 (push (list b
(point) current
) words
)
214 (skip-chars-forward all-specials
)))
216 (push (list b
(point) current
) words
)))
219 (defun rfc2047-encode-region (b e
)
220 "Encode all encodable words in REGION."
221 (let ((words (rfc2047-dissect-region b e
))
222 beg end current word
)
223 (while (setq word
(pop words
))
224 (if (equal (nth 2 word
) current
)
225 (setq beg
(nth 0 word
))
227 (if (and (eq beg
(nth 1 word
)) (nth 2 word
))
229 ;; There might be a bug in Emacs Mule.
230 ;; A space must be inserted before encoding.
233 (rfc2047-encode (1+ beg
) (1+ end
) current
))
234 (rfc2047-encode beg end current
)))
235 (setq current
(nth 2 word
)
239 (rfc2047-encode beg end current
))))
241 (defun rfc2047-encode-string (string)
242 "Encode words in STRING."
245 (rfc2047-encode-region (point-min) (point-max))
248 (defun rfc2047-encode (b e charset
)
249 "Encode the word in the region with CHARSET."
250 (let* ((mime-charset (mm-mime-charset charset
))
251 (encoding (or (cdr (assq mime-charset
252 rfc2047-charset-encoding-alist
))
255 "=?" (downcase (symbol-name mime-charset
)) "?"
256 (downcase (symbol-name encoding
)) "?"))
259 (narrow-to-region b e
)
260 (when (eq encoding
'B
)
261 ;; break into lines before encoding
262 (goto-char (point-min))
264 (goto-char (min (point-max) (+ 15 (point))))
267 (if (and (mm-multibyte-p)
268 (mm-coding-system-p mime-charset
))
269 (mm-encode-coding-region (point-min) (point-max) mime-charset
))
270 (funcall (cdr (assq encoding rfc2047-encoding-function-alist
))
271 (point-min) (point-max))
272 (goto-char (point-min))
282 (defun rfc2047-fold-region (b e
)
283 "Fold the long lines in the region."
285 (narrow-to-region b e
)
286 (goto-char (point-min))
290 ((memq (char-after) '(? ?
\t))
291 (setq break
(point)))
294 (setq break
(point)))
297 (> (- (point) (save-excursion (beginning-of-line) (point))) 76))
302 (forward-char 1))))))
304 (defun rfc2047-b-encode-region (b e
)
305 "Encode the header contained in REGION with the B encoding."
307 (narrow-to-region (goto-char b
) e
)
309 (base64-encode-region (point) (progn (end-of-line) (point)) t
)
310 (if (and (bolp) (eolp))
311 (delete-backward-char 1))
314 (defun rfc2047-q-encode-region (b e
)
315 "Encode the header contained in REGION with the Q encoding."
318 (narrow-to-region (goto-char b
) e
)
319 (let ((alist rfc2047-q-encoding-alist
))
321 (when (looking-at (caar alist
))
322 (quoted-printable-encode-region b e nil
(cdar alist
))
323 (subst-char-in-region (point-min) (point-max) ? ?_
)
326 (goto-char (point-min))
328 (goto-char (min (point-max) (+ 64 (point))))
329 (search-backward "=" (- (point) 2) t
)
334 ;;; Functions for decoding RFC2047 messages
337 (defvar rfc2047-encoded-word-regexp
338 "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\\([!->@-~ +]+\\)\\?=")
340 (defun rfc2047-decode-region (start end
)
341 "Decode MIME-encoded words in region between START and END."
343 (let ((case-fold-search t
)
347 (narrow-to-region start end
)
348 (goto-char (point-min))
349 ;; Remove whitespace between encoded words.
350 (while (re-search-forward
351 (concat "\\(" rfc2047-encoded-word-regexp
"\\)"
353 "\\(" rfc2047-encoded-word-regexp
"\\)")
355 (delete-region (goto-char (match-end 1)) (match-beginning 6)))
356 ;; Decode the encoded words.
357 (setq b
(goto-char (point-min)))
358 (while (re-search-forward rfc2047-encoded-word-regexp nil t
)
359 (setq e
(match-beginning 0))
360 (insert (rfc2047-parse-and-decode
363 (delete-region (match-beginning 0) (match-end 0)))))
364 (when (and (mm-multibyte-p)
366 (not (eq mail-parse-charset
'gnus-decoded
)))
367 (mm-decode-coding-region b e mail-parse-charset
))
369 (when (and (mm-multibyte-p)
371 (not (eq mail-parse-charset
'us-ascii
))
372 (not (eq mail-parse-charset
'gnus-decoded
)))
373 (mm-decode-coding-region b
(point-max) mail-parse-charset
))))))
375 (defun rfc2047-decode-string (string)
376 "Decode the quoted-printable-encoded STRING and return the results."
377 (let ((m (mm-multibyte-p)))
380 (mm-enable-multibyte))
383 (rfc2047-decode-region (point-min) (point-max)))
386 (defun rfc2047-parse-and-decode (word)
387 "Decode WORD and return it if it is an encoded word.
389 (if (not (string-match rfc2047-encoded-word-regexp word
))
394 (match-string 1 word
)
395 (upcase (match-string 2 word
))
396 (match-string 3 word
))
400 (defun rfc2047-decode (charset encoding string
)
401 "Decode STRING that uses CHARSET with ENCODING.
402 Valid ENCODINGs are \"B\" and \"Q\".
403 If your Emacs implementation can't decode CHARSET, it returns nil."
404 (if (stringp charset
)
405 (setq charset
(intern (downcase charset
))))
406 (if (or (not charset
)
407 (eq 'gnus-all mail-parse-ignored-charsets
)
408 (memq 'gnus-all mail-parse-ignored-charsets
)
409 (memq charset mail-parse-ignored-charsets
))
410 (setq charset mail-parse-charset
))
411 (let ((cs (mm-charset-to-coding-system charset
)))
412 (if (and (not cs
) charset
413 (listp mail-parse-ignored-charsets
)
414 (memq 'gnus-unknown mail-parse-ignored-charsets
))
415 (setq cs
(mm-charset-to-coding-system mail-parse-charset
)))
417 (when (and (eq cs
'ascii
)
419 (setq cs mail-parse-charset
))
420 (mm-decode-coding-string
422 ((equal "B" encoding
)
423 (base64-decode-string string
))
424 ((equal "Q" encoding
)
425 (quoted-printable-decode-string
426 (mm-replace-chars-in-string string ?_ ?
)))
427 (t (error "Invalid encoding: %s" encoding
)))
432 ;;; rfc2047.el ends here