Add 2012 to FSF copyright years for Emacs files (do not merge to trunk)
[bpt/emacs.git] / lisp / gnus / rfc2231.el
CommitLineData
23f87bed 1;;; rfc2231.el --- Functions for decoding rfc2231 headers
c113de23 2
61e1e4e8 3;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
49f70d46 4;; 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
c113de23
GM
5
6;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
7;; This file is part of GNU Emacs.
8
5e809f55 9;; GNU Emacs is free software: you can redistribute it and/or modify
c113de23 10;; it under the terms of the GNU General Public License as published by
5e809f55
GM
11;; the Free Software Foundation, either version 3 of the License, or
12;; (at your option) any later version.
c113de23
GM
13
14;; GNU Emacs is distributed in the hope that it will be useful,
15;; but WITHOUT ANY WARRANTY; without even the implied warranty of
5e809f55 16;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
c113de23
GM
17;; GNU General Public License for more details.
18
19;; You should have received a copy of the GNU General Public License
5e809f55 20;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
c113de23
GM
21
22;;; Commentary:
23
24;;; Code:
25
e017ba35 26(eval-when-compile (require 'cl))
c113de23 27(require 'ietf-drums)
23f87bed
MB
28(require 'rfc2047)
29(autoload 'mm-encode-body "mm-bodies")
30(autoload 'mail-header-remove-whitespace "mail-parse")
31(autoload 'mail-header-remove-comments "mail-parse")
c113de23
GM
32
33(defun rfc2231-get-value (ct attribute)
34 "Return the value of ATTRIBUTE from CT."
35 (cdr (assq attribute (cdr ct))))
36
23f87bed
MB
37(defun rfc2231-parse-qp-string (string)
38 "Parse QP-encoded string using `rfc2231-parse-string'.
39N.B. This is in violation with RFC2047, but it seem to be in common use."
40 (rfc2231-parse-string (rfc2047-decode-string string)))
41
c96ec15a 42(defun rfc2231-parse-string (string &optional signal-error)
c113de23
GM
43 "Parse STRING and return a list.
44The list will be on the form
c96ec15a
MB
45 `(name (attribute . value) (attribute . value)...)'.
46
47If the optional SIGNAL-ERROR is non-nil, signal an error when this
cf5a5c38
MB
48function fails in parsing of parameters. Otherwise, this function
49must never cause a Lisp error."
c113de23
GM
50 (with-temp-buffer
51 (let ((ttoken (ietf-drums-token-to-list ietf-drums-text-token))
52 (stoken (ietf-drums-token-to-list ietf-drums-tspecials))
53 (ntoken (ietf-drums-token-to-list "0-9"))
01c52d31 54 c type attribute encoded number parameters value)
cf5a5c38
MB
55 (ietf-drums-init
56 (condition-case nil
57 (mail-header-remove-whitespace
58 (mail-header-remove-comments string))
59 ;; The most likely cause of an error is unbalanced parentheses
60 ;; or double-quotes. If all parentheses and double-quotes are
61 ;; quoted meaninglessly with backslashes, removing them might
62 ;; make it parseable. Let's try...
63 (error
64 (let (mod)
65 (when (and (string-match "\\\\\"" string)
66 (not (string-match "\\`\"\\|[^\\]\"" string)))
67 (setq string (mm-replace-in-string string "\\\\\"" "\"")
68 mod t))
69 (when (and (string-match "\\\\(" string)
70 (string-match "\\\\)" string)
71 (not (string-match "\\`(\\|[^\\][()]" string)))
72 (setq string (mm-replace-in-string string "\\\\\\([()]\\)" "\\1")
73 mod t))
74 (or (and mod
75 (ignore-errors
76 (mail-header-remove-whitespace
77 (mail-header-remove-comments string))))
78 ;; Finally, attempt to extract only type.
79 (if (string-match
80 (concat "\\`[\t\n ]*\\([^" ietf-drums-tspecials "\t\n ]+"
01c52d31
MB
81 "\\(?:/[^" ietf-drums-tspecials
82 "\t\n ]+\\)?\\)\\(?:[\t\n ;]\\|\\'\\)")
cf5a5c38
MB
83 string)
84 (match-string 1 string)
85 ""))))))
c113de23
GM
86 (let ((table (copy-syntax-table ietf-drums-syntax-table)))
87 (modify-syntax-entry ?\' "w" table)
23f87bed
MB
88 (modify-syntax-entry ?* " " table)
89 (modify-syntax-entry ?\; " " table)
90 (modify-syntax-entry ?= " " table)
c113de23
GM
91 ;; The following isn't valid, but one should be liberal
92 ;; in what one receives.
93 (modify-syntax-entry ?\: "w" table)
94 (set-syntax-table table))
95 (setq c (char-after))
96 (when (and (memq c ttoken)
cf5a5c38
MB
97 (not (memq c stoken))
98 (setq type (ignore-errors
99 (downcase
100 (buffer-substring (point) (progn
101 (forward-sexp 1)
102 (point)))))))
c113de23 103 ;; Do the params
c96ec15a
MB
104 (condition-case err
105 (progn
106 (while (not (eobp))
c113de23 107 (setq c (char-after))
c96ec15a
MB
108 (unless (eq c ?\;)
109 (error "Invalid header: %s" string))
110 (forward-char 1)
111 ;; If c in nil, then this is an invalid header, but
112 ;; since elm generates invalid headers on this form,
113 ;; we allow it.
114 (when (setq c (char-after))
115 (if (and (memq c ttoken)
116 (not (memq c stoken)))
117 (setq attribute
118 (intern
119 (downcase
120 (buffer-substring
121 (point) (progn (forward-sexp 1) (point))))))
122 (error "Invalid header: %s" string))
123 (setq c (char-after))
34128042
MB
124 (if (eq c ?*)
125 (progn
c96ec15a 126 (forward-char 1)
34128042
MB
127 (setq c (char-after))
128 (if (not (memq c ntoken))
129 (setq encoded t
130 number nil)
131 (setq number
132 (string-to-number
133 (buffer-substring
134 (point) (progn (forward-sexp 1) (point)))))
135 (setq c (char-after))
136 (when (eq c ?*)
137 (setq encoded t)
138 (forward-char 1)
139 (setq c (char-after)))))
140 (setq number nil
141 encoded nil))
c96ec15a
MB
142 (unless (eq c ?=)
143 (error "Invalid header: %s" string))
c113de23 144 (forward-char 1)
c96ec15a
MB
145 (setq c (char-after))
146 (cond
147 ((eq c ?\")
148 (setq value (buffer-substring (1+ (point))
149 (progn
150 (forward-sexp 1)
34128042
MB
151 (1- (point)))))
152 (when encoded
153 (setq value (mapconcat (lambda (c) (format "%%%02x" c))
154 value ""))))
c96ec15a
MB
155 ((and (or (memq c ttoken)
156 ;; EXTENSION: Support non-ascii chars.
157 (> c ?\177))
158 (not (memq c stoken)))
159 (setq value
160 (buffer-substring
58090a8d
MB
161 (point)
162 (progn
a367620f
MB
163 ;; Jump over asterisk, non-ASCII
164 ;; and non-boundary characters.
165 (while (and c
166 (or (eq c ?*)
167 (> c ?\177)
168 (not (eq (char-syntax c) ? ))))
58090a8d 169 (forward-char 1)
a367620f 170 (setq c (char-after)))
58090a8d 171 (point)))))
c96ec15a
MB
172 (t
173 (error "Invalid header: %s" string)))
01c52d31
MB
174 (push (list attribute value number encoded)
175 parameters))))
c96ec15a
MB
176 (error
177 (setq parameters nil)
01c52d31
MB
178 (when signal-error
179 (signal (car err) (cdr err)))))
180
181 ;; Now collect and concatenate continuation parameters.
182 (let ((cparams nil)
183 elem)
184 (loop for (attribute value part encoded)
185 in (sort parameters (lambda (e1 e2)
186 (< (or (caddr e1) 0)
187 (or (caddr e2) 0))))
188 do (if (or (not (setq elem (assq attribute cparams)))
189 (and (numberp part)
190 (zerop part)))
191 (push (list attribute value encoded) cparams)
192 (setcar (cdr elem) (concat (cadr elem) value))))
193 ;; Finally decode encoded values.
194 (cons type (mapcar
195 (lambda (elem)
196 (cons (car elem)
197 (if (nth 2 elem)
198 (rfc2231-decode-encoded-string (nth 1 elem))
199 (nth 1 elem))))
200 (nreverse cparams))))))))
c113de23
GM
201
202(defun rfc2231-decode-encoded-string (string)
203 "Decode an RFC2231-encoded string.
34128042
MB
204These look like:
205 \"us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\",
206 \"us-ascii''This%20is%20%2A%2A%2Afun%2A%2A%2A\",
207 \"'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\",
208 \"''This%20is%20%2A%2A%2Afun%2A%2A%2A\", or
209 \"This is ***fun***\"."
01c52d31
MB
210 (string-match "\\`\\(?:\\([^']+\\)?'\\([^']+\\)?'\\)?\\(.+\\)" string)
211 (let ((coding-system (mm-charset-to-coding-system (match-string 1 string)))
212 ;;(language (match-string 2 string))
213 (value (match-string 3 string)))
e9e33c19 214 (mm-with-unibyte-buffer
34128042 215 (insert value)
c113de23 216 (goto-char (point-min))
656b4fa1 217 (while (re-search-forward "%\\([0-9A-Fa-f][0-9A-Fa-f]\\)" nil t)
c113de23
GM
218 (insert
219 (prog1
656b4fa1
MB
220 (string-to-number (match-string 1) 16)
221 (delete-region (match-beginning 0) (match-end 0)))))
34128042 222 ;; Decode using the charset, if any.
e9e33c19
KH
223 (if (memq coding-system '(nil ascii))
224 (buffer-string)
225 (mm-decode-coding-string (buffer-string) coding-system)))))
c113de23
GM
226
227(defun rfc2231-encode-string (param value)
c96ec15a
MB
228 "Return and PARAM=VALUE string encoded according to RFC2231.
229Use `mml-insert-parameter' or `mml-insert-parameter-string' to insert
230the result of this function."
c113de23
GM
231 (let ((control (ietf-drums-token-to-list ietf-drums-no-ws-ctl-token))
232 (tspecial (ietf-drums-token-to-list ietf-drums-tspecials))
233 (special (ietf-drums-token-to-list "*'%\n\t"))
234 (ascii (ietf-drums-token-to-list ietf-drums-text-token))
235 (num -1)
c96ec15a 236 ;; Don't make lines exceeding 76 column.
6203370b 237 (limit (- 74 (length param)))
c113de23 238 spacep encodep charsetp charset broken)
e9e33c19 239 (mm-with-multibyte-buffer
c113de23
GM
240 (insert value)
241 (goto-char (point-min))
242 (while (not (eobp))
243 (cond
244 ((or (memq (following-char) control)
245 (memq (following-char) tspecial)
246 (memq (following-char) special))
247 (setq encodep t))
248 ((eq (following-char) ? )
249 (setq spacep t))
250 ((not (memq (following-char) ascii))
251 (setq charsetp t)))
252 (forward-char 1))
253 (when charsetp
254 (setq charset (mm-encode-body)))
4f162824 255 (mm-disable-multibyte)
c113de23 256 (cond
6203370b
MB
257 ((or encodep charsetp
258 (progn
259 (end-of-line)
260 (> (current-column) (if spacep (- limit 2) limit))))
261 (setq limit (- limit 6))
c113de23 262 (goto-char (point-min))
6203370b 263 (insert (symbol-name (or charset 'us-ascii)) "''")
c113de23 264 (while (not (eobp))
c113de23
GM
265 (if (or (not (memq (following-char) ascii))
266 (memq (following-char) control)
267 (memq (following-char) tspecial)
268 (memq (following-char) special)
269 (eq (following-char) ? ))
270 (progn
6203370b
MB
271 (when (>= (current-column) (1- limit))
272 (insert ";\n")
273 (setq broken t))
c113de23
GM
274 (insert "%" (format "%02x" (following-char)))
275 (delete-char 1))
6203370b
MB
276 (when (> (current-column) limit)
277 (insert ";\n")
278 (setq broken t))
c113de23
GM
279 (forward-char 1)))
280 (goto-char (point-min))
c113de23
GM
281 (if (not broken)
282 (insert param "*=")
283 (while (not (eobp))
c96ec15a 284 (insert (if (>= num 0) " " "")
23f87bed 285 param "*" (format "%d" (incf num)) "*=")
c113de23
GM
286 (forward-line 1))))
287 (spacep
288 (goto-char (point-min))
34128042 289 (insert param "=\"")
c113de23
GM
290 (goto-char (point-max))
291 (insert "\""))
292 (t
293 (goto-char (point-min))
34128042 294 (insert param "=")))
c113de23
GM
295 (buffer-string))))
296
297(provide 'rfc2231)
298
cbee283d 299;; arch-tag: c3ab751d-d108-406a-b301-68882ad8cd63
c113de23 300;;; rfc2231.el ends here