1 ;;; rfc2047.el --- Functions for encoding and decoding rfc2047 messages
2 ;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
6 ;; This file is part of GNU Emacs.
8 ;; GNU Emacs is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; GNU Emacs is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GNU Emacs; see the file COPYING. If not, write to the
20 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 ;; Boston, MA 02111-1307, USA.
34 (eval-when-compile (defvar message-posting-charset
))
36 (defvar rfc2047-header-encoding-alist
37 '(("Newsgroups" . nil
)
40 "*Header/encoding method alist.
41 The list is traversed sequentially. The keys can either be
42 header regexps or `t'.
46 1) nil, in which case no encoding is done;
47 2) `mime', in which case the header will be encoded according to RFC2047;
48 3) a charset, in which case it will be encoded as that charset;
49 4) `default', in which case the field will be encoded as the rest
52 (defvar rfc2047-charset-encoding-alist
71 "Alist of MIME charsets to RFC2047 encodings.
72 Valid encodings are nil, `Q' and `B'.")
74 (defvar rfc2047-encoding-function-alist
75 '((Q . rfc2047-q-encode-region
)
76 (B . rfc2047-b-encode-region
)
78 "Alist of RFC2047 encodings to encoding functions.")
80 (defvar rfc2047-q-encoding-alist
81 '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" .
"-A-Za-z0-9!*+/")
82 ;; = (\075), _ (\137), ? (\077) are used in the encoded word.
83 ;; Avoid using 8bit characters. Some versions of Emacs has bug!
84 ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?"
85 ("." .
"\010\012\014\040-\074\076\100-\136\140-\177"))
86 "Alist of header regexps and valid Q characters.")
89 ;;; Functions for encoding RFC2047 messages
92 (defun rfc2047-narrow-to-field ()
93 "Narrow the buffer to the header on the current line."
99 (if (re-search-forward "^[^ \n\t]" nil t
)
104 (goto-char (point-min)))
106 (defun rfc2047-encode-message-header ()
107 "Encode the message header according to `rfc2047-header-encoding-alist'.
108 Should be called narrowed to the head of the message."
112 (goto-char (point-min))
113 (let (alist elem method
)
116 (rfc2047-narrow-to-field)
117 (if (not (rfc2047-encodable-p))
118 (if (and (eq (mm-body-7-or-8) '8bit
)
121 (car message-posting-charset
)))
122 ;; 8 bit must be decoded.
123 ;; Is message-posting-charset a coding system?
124 (mm-encode-coding-region
125 (point-min) (point-max)
126 (car message-posting-charset
)))
127 ;; We found something that may perhaps be encoded.
129 alist rfc2047-header-encoding-alist
)
130 (while (setq elem
(pop alist
))
131 (when (or (and (stringp (car elem
))
132 (looking-at (car elem
)))
138 (rfc2047-encode-region (point-min) (point-max))
139 (rfc2047-fold-region (point-min) (point-max)))
140 ((eq method
'default
)
141 (if (and (featurep 'mule
)
143 (mm-encode-coding-region (point-min) (point-max)
144 mail-parse-charset
)))
145 ((mm-coding-system-p method
)
147 (mm-encode-coding-region (point-min) (point-max) method
)))
150 (goto-char (point-max)))))))
152 (defun rfc2047-encodable-p (&optional header
)
153 "Say whether the current (narrowed) buffer contains characters that need encoding in headers."
158 (mm-find-charset-region (point-min) (point-max))))
159 (cs (list 'us-ascii
(car message-posting-charset
)))
162 (unless (memq (pop charsets
) cs
)
166 (defun rfc2047-dissect-region (b e
)
167 "Dissect the region between B and E into words."
168 (let ((all-specials (concat ietf-drums-tspecials
" \t\n\r"))
169 (special-list (mapcar 'identity ietf-drums-tspecials
))
170 (blank-list '(? ?
\t ?
\n ?
\r))
171 words current cs state mail-parse-mule-charset
)
173 (narrow-to-region b e
)
174 (goto-char (point-min))
175 (skip-chars-forward all-specials
)
181 (if (not (eq (setq cs
(mm-charset-after)) 'ascii
))
186 ((memq (char-after) special-list
)
188 ((memq (char-after) blank-list
))
193 (if (not (eq (setq cs
(mm-charset-after)) 'ascii
))
194 (setq current cs
)))))
197 ((memq (char-after) special-list
)
199 (push (list b
(point) current
) words
)
201 ((memq (char-after) blank-list
)
205 (push (list b
(point) current
) words
)
208 ((or (eq (setq cs
(mm-charset-after)) 'ascii
)
213 (push (list b
(point) current
) words
)
218 (skip-chars-forward all-specials
)))
220 (push (list b
(point) current
) words
)))
223 (defun rfc2047-encode-region (b e
)
224 "Encode all encodable words in REGION."
225 (let ((words (rfc2047-dissect-region b e
))
226 beg end current word
)
227 (while (setq word
(pop words
))
228 (if (equal (nth 2 word
) current
)
229 (setq beg
(nth 0 word
))
231 (if (and (eq beg
(nth 1 word
)) (nth 2 word
))
233 ;; There might be a bug in Emacs Mule.
234 ;; A space must be inserted before encoding.
237 (rfc2047-encode (1+ beg
) (1+ end
) current
))
238 (rfc2047-encode beg end current
)))
239 (setq current
(nth 2 word
)
243 (rfc2047-encode beg end current
))))
245 (defun rfc2047-encode-string (string)
246 "Encode words in STRING."
249 (rfc2047-encode-region (point-min) (point-max))
252 (defun rfc2047-encode (b e charset
)
253 "Encode the word in the region with CHARSET."
254 (let* ((mime-charset (mm-mime-charset charset
))
255 (encoding (or (cdr (assq mime-charset
256 rfc2047-charset-encoding-alist
))
259 "=?" (downcase (symbol-name mime-charset
)) "?"
260 (downcase (symbol-name encoding
)) "?"))
263 (narrow-to-region b e
)
264 (when (eq encoding
'B
)
265 ;; break into lines before encoding
266 (goto-char (point-min))
268 (goto-char (min (point-max) (+ 15 (point))))
271 (if (and (mm-multibyte-p)
272 (mm-coding-system-p mime-charset
))
273 (mm-encode-coding-region (point-min) (point-max) mime-charset
))
274 (funcall (cdr (assq encoding rfc2047-encoding-function-alist
))
275 (point-min) (point-max))
276 (goto-char (point-min))
286 (defun rfc2047-fold-region (b e
)
287 "Fold the long lines in the region."
289 (narrow-to-region b e
)
290 (goto-char (point-min))
294 ((memq (char-after) '(? ?
\t))
295 (setq break
(point)))
298 (setq break
(point)))
301 (> (- (point) (save-excursion (beginning-of-line) (point))) 76))
306 (forward-char 1))))))
308 (defun rfc2047-b-encode-region (b e
)
309 "Encode the header contained in REGION with the B encoding."
311 (narrow-to-region (goto-char b
) e
)
313 (base64-encode-region (point) (progn (end-of-line) (point)) t
)
314 (if (and (bolp) (eolp))
315 (delete-backward-char 1))
318 (defun rfc2047-q-encode-region (b e
)
319 "Encode the header contained in REGION with the Q encoding."
322 (narrow-to-region (goto-char b
) e
)
323 (let ((alist rfc2047-q-encoding-alist
))
325 (when (looking-at (caar alist
))
326 (quoted-printable-encode-region b e nil
(cdar alist
))
327 (subst-char-in-region (point-min) (point-max) ? ?_
)
330 (goto-char (point-min))
332 (goto-char (min (point-max) (+ 64 (point))))
333 (search-backward "=" (- (point) 2) t
)
338 ;;; Functions for decoding RFC2047 messages
341 (defvar rfc2047-encoded-word-regexp
342 "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\\([!->@-~ +]+\\)\\?=")
344 (defun rfc2047-decode-region (start end
)
345 "Decode MIME-encoded words in region between START and END."
347 (let ((case-fold-search t
)
351 (narrow-to-region start end
)
352 (goto-char (point-min))
353 ;; Remove whitespace between encoded words.
354 (while (re-search-forward
355 (concat "\\(" rfc2047-encoded-word-regexp
"\\)"
357 "\\(" rfc2047-encoded-word-regexp
"\\)")
359 (delete-region (goto-char (match-end 1)) (match-beginning 6)))
360 ;; Decode the encoded words.
361 (setq b
(goto-char (point-min)))
362 (while (re-search-forward rfc2047-encoded-word-regexp nil t
)
363 (setq e
(match-beginning 0))
364 (insert (rfc2047-parse-and-decode
367 (delete-region (match-beginning 0) (match-end 0)))))
368 (when (and (mm-multibyte-p)
370 (not (eq mail-parse-charset
'gnus-decoded
)))
371 (mm-decode-coding-region b e mail-parse-charset
))
373 (when (and (mm-multibyte-p)
375 (not (eq mail-parse-charset
'us-ascii
))
376 (not (eq mail-parse-charset
'gnus-decoded
)))
377 (mm-decode-coding-region b
(point-max) mail-parse-charset
))))))
379 (defun rfc2047-decode-string (string)
380 "Decode the quoted-printable-encoded STRING and return the results."
381 (let ((m (mm-multibyte-p)))
384 (mm-enable-multibyte))
387 (rfc2047-decode-region (point-min) (point-max)))
390 (defun rfc2047-parse-and-decode (word)
391 "Decode WORD and return it if it is an encoded word.
393 (if (not (string-match rfc2047-encoded-word-regexp word
))
398 (match-string 1 word
)
399 (upcase (match-string 2 word
))
400 (match-string 3 word
))
404 (defun rfc2047-decode (charset encoding string
)
405 "Decode STRING that uses CHARSET with ENCODING.
406 Valid ENCODINGs are \"B\" and \"Q\".
407 If your Emacs implementation can't decode CHARSET, it returns nil."
408 (if (stringp charset
)
409 (setq charset
(intern (downcase charset
))))
410 (if (or (not charset
)
411 (eq 'gnus-all mail-parse-ignored-charsets
)
412 (memq 'gnus-all mail-parse-ignored-charsets
)
413 (memq charset mail-parse-ignored-charsets
))
414 (setq charset mail-parse-charset
))
415 (let ((cs (mm-charset-to-coding-system charset
)))
416 (if (and (not cs
) charset
417 (listp mail-parse-ignored-charsets
)
418 (memq 'gnus-unknown mail-parse-ignored-charsets
))
419 (setq cs
(mm-charset-to-coding-system mail-parse-charset
)))
421 (when (and (eq cs
'ascii
)
423 (setq cs mail-parse-charset
))
424 (mm-decode-coding-string
426 ((equal "B" encoding
)
427 (base64-decode-string string
))
428 ((equal "Q" encoding
)
429 (quoted-printable-decode-string
430 (mm-replace-chars-in-string string ?_ ?
)))
431 (t (error "Invalid encoding: %s" encoding
)))
436 ;;; rfc2047.el ends here