Commit | Line | Data |
---|---|---|
23f87bed | 1 | ;;; rfc2231.el --- Functions for decoding rfc2231 headers |
c113de23 | 2 | |
61e1e4e8 | 3 | ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, |
5df4f04c | 4 | ;; 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. |
c113de23 GM |
5 | |
6 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
7 | ;; This file is part of GNU Emacs. | |
8 | ||
5e809f55 | 9 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
c113de23 | 10 | ;; it under the terms of the GNU General Public License as published by |
5e809f55 GM |
11 | ;; the Free Software Foundation, either version 3 of the License, or |
12 | ;; (at your option) any later version. | |
c113de23 GM |
13 | |
14 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
15 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
5e809f55 | 16 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
c113de23 GM |
17 | ;; GNU General Public License for more details. |
18 | ||
19 | ;; You should have received a copy of the GNU General Public License | |
5e809f55 | 20 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
c113de23 GM |
21 | |
22 | ;;; Commentary: | |
23 | ||
24 | ;;; Code: | |
25 | ||
e017ba35 | 26 | (eval-when-compile (require 'cl)) |
c113de23 | 27 | (require 'ietf-drums) |
23f87bed MB |
28 | (require 'rfc2047) |
29 | (autoload 'mm-encode-body "mm-bodies") | |
30 | (autoload 'mail-header-remove-whitespace "mail-parse") | |
31 | (autoload 'mail-header-remove-comments "mail-parse") | |
c113de23 GM |
32 | |
33 | (defun rfc2231-get-value (ct attribute) | |
34 | "Return the value of ATTRIBUTE from CT." | |
35 | (cdr (assq attribute (cdr ct)))) | |
36 | ||
23f87bed MB |
37 | (defun rfc2231-parse-qp-string (string) |
38 | "Parse QP-encoded string using `rfc2231-parse-string'. | |
39 | N.B. This is in violation with RFC2047, but it seem to be in common use." | |
40 | (rfc2231-parse-string (rfc2047-decode-string string))) | |
41 | ||
c96ec15a | 42 | (defun rfc2231-parse-string (string &optional signal-error) |
c113de23 GM |
43 | "Parse STRING and return a list. |
44 | The list will be on the form | |
c96ec15a MB |
45 | `(name (attribute . value) (attribute . value)...)'. |
46 | ||
47 | If the optional SIGNAL-ERROR is non-nil, signal an error when this | |
cf5a5c38 MB |
48 | function fails in parsing of parameters. Otherwise, this function |
49 | must never cause a Lisp error." | |
c113de23 GM |
50 | (with-temp-buffer |
51 | (let ((ttoken (ietf-drums-token-to-list ietf-drums-text-token)) | |
52 | (stoken (ietf-drums-token-to-list ietf-drums-tspecials)) | |
53 | (ntoken (ietf-drums-token-to-list "0-9")) | |
01c52d31 | 54 | c type attribute encoded number parameters value) |
cf5a5c38 MB |
55 | (ietf-drums-init |
56 | (condition-case nil | |
57 | (mail-header-remove-whitespace | |
58 | (mail-header-remove-comments string)) | |
59 | ;; The most likely cause of an error is unbalanced parentheses | |
60 | ;; or double-quotes. If all parentheses and double-quotes are | |
61 | ;; quoted meaninglessly with backslashes, removing them might | |
62 | ;; make it parseable. Let's try... | |
63 | (error | |
64 | (let (mod) | |
65 | (when (and (string-match "\\\\\"" string) | |
66 | (not (string-match "\\`\"\\|[^\\]\"" string))) | |
67 | (setq string (mm-replace-in-string string "\\\\\"" "\"") | |
68 | mod t)) | |
69 | (when (and (string-match "\\\\(" string) | |
70 | (string-match "\\\\)" string) | |
71 | (not (string-match "\\`(\\|[^\\][()]" string))) | |
72 | (setq string (mm-replace-in-string string "\\\\\\([()]\\)" "\\1") | |
73 | mod t)) | |
74 | (or (and mod | |
75 | (ignore-errors | |
76 | (mail-header-remove-whitespace | |
77 | (mail-header-remove-comments string)))) | |
78 | ;; Finally, attempt to extract only type. | |
79 | (if (string-match | |
80 | (concat "\\`[\t\n ]*\\([^" ietf-drums-tspecials "\t\n ]+" | |
01c52d31 MB |
81 | "\\(?:/[^" ietf-drums-tspecials |
82 | "\t\n ]+\\)?\\)\\(?:[\t\n ;]\\|\\'\\)") | |
cf5a5c38 MB |
83 | string) |
84 | (match-string 1 string) | |
85 | "")))))) | |
c113de23 GM |
86 | (let ((table (copy-syntax-table ietf-drums-syntax-table))) |
87 | (modify-syntax-entry ?\' "w" table) | |
23f87bed MB |
88 | (modify-syntax-entry ?* " " table) |
89 | (modify-syntax-entry ?\; " " table) | |
90 | (modify-syntax-entry ?= " " table) | |
c113de23 GM |
91 | ;; The following isn't valid, but one should be liberal |
92 | ;; in what one receives. | |
93 | (modify-syntax-entry ?\: "w" table) | |
94 | (set-syntax-table table)) | |
95 | (setq c (char-after)) | |
96 | (when (and (memq c ttoken) | |
cf5a5c38 MB |
97 | (not (memq c stoken)) |
98 | (setq type (ignore-errors | |
99 | (downcase | |
100 | (buffer-substring (point) (progn | |
101 | (forward-sexp 1) | |
102 | (point))))))) | |
c113de23 | 103 | ;; Do the params |
c96ec15a MB |
104 | (condition-case err |
105 | (progn | |
106 | (while (not (eobp)) | |
c113de23 | 107 | (setq c (char-after)) |
c96ec15a MB |
108 | (unless (eq c ?\;) |
109 | (error "Invalid header: %s" string)) | |
110 | (forward-char 1) | |
111 | ;; If c in nil, then this is an invalid header, but | |
112 | ;; since elm generates invalid headers on this form, | |
113 | ;; we allow it. | |
114 | (when (setq c (char-after)) | |
115 | (if (and (memq c ttoken) | |
116 | (not (memq c stoken))) | |
117 | (setq attribute | |
118 | (intern | |
119 | (downcase | |
120 | (buffer-substring | |
121 | (point) (progn (forward-sexp 1) (point)))))) | |
122 | (error "Invalid header: %s" string)) | |
123 | (setq c (char-after)) | |
34128042 MB |
124 | (if (eq c ?*) |
125 | (progn | |
c96ec15a | 126 | (forward-char 1) |
34128042 MB |
127 | (setq c (char-after)) |
128 | (if (not (memq c ntoken)) | |
129 | (setq encoded t | |
130 | number nil) | |
131 | (setq number | |
132 | (string-to-number | |
133 | (buffer-substring | |
134 | (point) (progn (forward-sexp 1) (point))))) | |
135 | (setq c (char-after)) | |
136 | (when (eq c ?*) | |
137 | (setq encoded t) | |
138 | (forward-char 1) | |
139 | (setq c (char-after))))) | |
140 | (setq number nil | |
141 | encoded nil)) | |
c96ec15a MB |
142 | (unless (eq c ?=) |
143 | (error "Invalid header: %s" string)) | |
c113de23 | 144 | (forward-char 1) |
c96ec15a MB |
145 | (setq c (char-after)) |
146 | (cond | |
147 | ((eq c ?\") | |
148 | (setq value (buffer-substring (1+ (point)) | |
149 | (progn | |
150 | (forward-sexp 1) | |
34128042 MB |
151 | (1- (point))))) |
152 | (when encoded | |
153 | (setq value (mapconcat (lambda (c) (format "%%%02x" c)) | |
154 | value "")))) | |
c96ec15a MB |
155 | ((and (or (memq c ttoken) |
156 | ;; EXTENSION: Support non-ascii chars. | |
157 | (> c ?\177)) | |
158 | (not (memq c stoken))) | |
159 | (setq value | |
160 | (buffer-substring | |
58090a8d MB |
161 | (point) |
162 | (progn | |
a367620f MB |
163 | ;; Jump over asterisk, non-ASCII |
164 | ;; and non-boundary characters. | |
165 | (while (and c | |
166 | (or (eq c ?*) | |
167 | (> c ?\177) | |
168 | (not (eq (char-syntax c) ? )))) | |
58090a8d | 169 | (forward-char 1) |
a367620f | 170 | (setq c (char-after))) |
58090a8d | 171 | (point))))) |
c96ec15a MB |
172 | (t |
173 | (error "Invalid header: %s" string))) | |
01c52d31 MB |
174 | (push (list attribute value number encoded) |
175 | parameters)))) | |
c96ec15a MB |
176 | (error |
177 | (setq parameters nil) | |
01c52d31 MB |
178 | (when signal-error |
179 | (signal (car err) (cdr err))))) | |
180 | ||
181 | ;; Now collect and concatenate continuation parameters. | |
182 | (let ((cparams nil) | |
183 | elem) | |
184 | (loop for (attribute value part encoded) | |
185 | in (sort parameters (lambda (e1 e2) | |
186 | (< (or (caddr e1) 0) | |
187 | (or (caddr e2) 0)))) | |
188 | do (if (or (not (setq elem (assq attribute cparams))) | |
189 | (and (numberp part) | |
190 | (zerop part))) | |
191 | (push (list attribute value encoded) cparams) | |
192 | (setcar (cdr elem) (concat (cadr elem) value)))) | |
193 | ;; Finally decode encoded values. | |
194 | (cons type (mapcar | |
195 | (lambda (elem) | |
196 | (cons (car elem) | |
197 | (if (nth 2 elem) | |
198 | (rfc2231-decode-encoded-string (nth 1 elem)) | |
199 | (nth 1 elem)))) | |
200 | (nreverse cparams)))))))) | |
c113de23 GM |
201 | |
202 | (defun rfc2231-decode-encoded-string (string) | |
203 | "Decode an RFC2231-encoded string. | |
34128042 MB |
204 | These look like: |
205 | \"us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\", | |
206 | \"us-ascii''This%20is%20%2A%2A%2Afun%2A%2A%2A\", | |
207 | \"'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\", | |
208 | \"''This%20is%20%2A%2A%2Afun%2A%2A%2A\", or | |
209 | \"This is ***fun***\"." | |
01c52d31 MB |
210 | (string-match "\\`\\(?:\\([^']+\\)?'\\([^']+\\)?'\\)?\\(.+\\)" string) |
211 | (let ((coding-system (mm-charset-to-coding-system (match-string 1 string))) | |
212 | ;;(language (match-string 2 string)) | |
213 | (value (match-string 3 string))) | |
e9e33c19 | 214 | (mm-with-unibyte-buffer |
34128042 | 215 | (insert value) |
c113de23 | 216 | (goto-char (point-min)) |
656b4fa1 | 217 | (while (re-search-forward "%\\([0-9A-Fa-f][0-9A-Fa-f]\\)" nil t) |
c113de23 GM |
218 | (insert |
219 | (prog1 | |
656b4fa1 MB |
220 | (string-to-number (match-string 1) 16) |
221 | (delete-region (match-beginning 0) (match-end 0))))) | |
34128042 | 222 | ;; Decode using the charset, if any. |
e9e33c19 KH |
223 | (if (memq coding-system '(nil ascii)) |
224 | (buffer-string) | |
225 | (mm-decode-coding-string (buffer-string) coding-system))))) | |
c113de23 GM |
226 | |
227 | (defun rfc2231-encode-string (param value) | |
c96ec15a MB |
228 | "Return and PARAM=VALUE string encoded according to RFC2231. |
229 | Use `mml-insert-parameter' or `mml-insert-parameter-string' to insert | |
230 | the result of this function." | |
c113de23 GM |
231 | (let ((control (ietf-drums-token-to-list ietf-drums-no-ws-ctl-token)) |
232 | (tspecial (ietf-drums-token-to-list ietf-drums-tspecials)) | |
233 | (special (ietf-drums-token-to-list "*'%\n\t")) | |
234 | (ascii (ietf-drums-token-to-list ietf-drums-text-token)) | |
235 | (num -1) | |
c96ec15a | 236 | ;; Don't make lines exceeding 76 column. |
6203370b | 237 | (limit (- 74 (length param))) |
c113de23 | 238 | spacep encodep charsetp charset broken) |
e9e33c19 | 239 | (mm-with-multibyte-buffer |
c113de23 GM |
240 | (insert value) |
241 | (goto-char (point-min)) | |
242 | (while (not (eobp)) | |
243 | (cond | |
244 | ((or (memq (following-char) control) | |
245 | (memq (following-char) tspecial) | |
246 | (memq (following-char) special)) | |
247 | (setq encodep t)) | |
248 | ((eq (following-char) ? ) | |
249 | (setq spacep t)) | |
250 | ((not (memq (following-char) ascii)) | |
251 | (setq charsetp t))) | |
252 | (forward-char 1)) | |
253 | (when charsetp | |
254 | (setq charset (mm-encode-body))) | |
4f162824 | 255 | (mm-disable-multibyte) |
c113de23 | 256 | (cond |
6203370b MB |
257 | ((or encodep charsetp |
258 | (progn | |
259 | (end-of-line) | |
260 | (> (current-column) (if spacep (- limit 2) limit)))) | |
261 | (setq limit (- limit 6)) | |
c113de23 | 262 | (goto-char (point-min)) |
6203370b | 263 | (insert (symbol-name (or charset 'us-ascii)) "''") |
c113de23 | 264 | (while (not (eobp)) |
c113de23 GM |
265 | (if (or (not (memq (following-char) ascii)) |
266 | (memq (following-char) control) | |
267 | (memq (following-char) tspecial) | |
268 | (memq (following-char) special) | |
269 | (eq (following-char) ? )) | |
270 | (progn | |
6203370b MB |
271 | (when (>= (current-column) (1- limit)) |
272 | (insert ";\n") | |
273 | (setq broken t)) | |
c113de23 GM |
274 | (insert "%" (format "%02x" (following-char))) |
275 | (delete-char 1)) | |
6203370b MB |
276 | (when (> (current-column) limit) |
277 | (insert ";\n") | |
278 | (setq broken t)) | |
c113de23 GM |
279 | (forward-char 1))) |
280 | (goto-char (point-min)) | |
c113de23 GM |
281 | (if (not broken) |
282 | (insert param "*=") | |
283 | (while (not (eobp)) | |
c96ec15a | 284 | (insert (if (>= num 0) " " "") |
23f87bed | 285 | param "*" (format "%d" (incf num)) "*=") |
c113de23 GM |
286 | (forward-line 1)))) |
287 | (spacep | |
288 | (goto-char (point-min)) | |
34128042 | 289 | (insert param "=\"") |
c113de23 GM |
290 | (goto-char (point-max)) |
291 | (insert "\"")) | |
292 | (t | |
293 | (goto-char (point-min)) | |
34128042 | 294 | (insert param "="))) |
c113de23 GM |
295 | (buffer-string)))) |
296 | ||
297 | (provide 'rfc2231) | |
298 | ||
cbee283d | 299 | ;; arch-tag: c3ab751d-d108-406a-b301-68882ad8cd63 |
c113de23 | 300 | ;;; rfc2231.el ends here |