Commit | Line | Data |
---|---|---|
23f87bed | 1 | ;;; rfc2231.el --- Functions for decoding rfc2231 headers |
c113de23 | 2 | |
ba318903 | 3 | ;; Copyright (C) 1998-2014 Free Software Foundation, Inc. |
c113de23 GM |
4 | |
5 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
6 | ;; This file is part of GNU Emacs. | |
7 | ||
5e809f55 | 8 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
c113de23 | 9 | ;; it under the terms of the GNU General Public License as published by |
5e809f55 GM |
10 | ;; the Free Software Foundation, either version 3 of the License, or |
11 | ;; (at your option) any later version. | |
c113de23 GM |
12 | |
13 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
14 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
5e809f55 | 15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
c113de23 GM |
16 | ;; GNU General Public License for more details. |
17 | ||
18 | ;; You should have received a copy of the GNU General Public License | |
5e809f55 | 19 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
c113de23 GM |
20 | |
21 | ;;; Commentary: | |
22 | ||
23 | ;;; Code: | |
24 | ||
e017ba35 | 25 | (eval-when-compile (require 'cl)) |
c113de23 | 26 | (require 'ietf-drums) |
23f87bed MB |
27 | (require 'rfc2047) |
28 | (autoload 'mm-encode-body "mm-bodies") | |
29 | (autoload 'mail-header-remove-whitespace "mail-parse") | |
30 | (autoload 'mail-header-remove-comments "mail-parse") | |
c113de23 GM |
31 | |
32 | (defun rfc2231-get-value (ct attribute) | |
33 | "Return the value of ATTRIBUTE from CT." | |
34 | (cdr (assq attribute (cdr ct)))) | |
35 | ||
23f87bed MB |
36 | (defun rfc2231-parse-qp-string (string) |
37 | "Parse QP-encoded string using `rfc2231-parse-string'. | |
38 | N.B. This is in violation with RFC2047, but it seem to be in common use." | |
39 | (rfc2231-parse-string (rfc2047-decode-string string))) | |
40 | ||
c96ec15a | 41 | (defun rfc2231-parse-string (string &optional signal-error) |
c113de23 GM |
42 | "Parse STRING and return a list. |
43 | The list will be on the form | |
c96ec15a MB |
44 | `(name (attribute . value) (attribute . value)...)'. |
45 | ||
46 | If the optional SIGNAL-ERROR is non-nil, signal an error when this | |
cf5a5c38 MB |
47 | function fails in parsing of parameters. Otherwise, this function |
48 | must never cause a Lisp error." | |
c113de23 GM |
49 | (with-temp-buffer |
50 | (let ((ttoken (ietf-drums-token-to-list ietf-drums-text-token)) | |
51 | (stoken (ietf-drums-token-to-list ietf-drums-tspecials)) | |
52 | (ntoken (ietf-drums-token-to-list "0-9")) | |
01c52d31 | 53 | c type attribute encoded number parameters value) |
cf5a5c38 MB |
54 | (ietf-drums-init |
55 | (condition-case nil | |
56 | (mail-header-remove-whitespace | |
57 | (mail-header-remove-comments string)) | |
58 | ;; The most likely cause of an error is unbalanced parentheses | |
59 | ;; or double-quotes. If all parentheses and double-quotes are | |
60 | ;; quoted meaninglessly with backslashes, removing them might | |
fac916bf | 61 | ;; make it parsable. Let's try... |
cf5a5c38 MB |
62 | (error |
63 | (let (mod) | |
64 | (when (and (string-match "\\\\\"" string) | |
65 | (not (string-match "\\`\"\\|[^\\]\"" string))) | |
66 | (setq string (mm-replace-in-string string "\\\\\"" "\"") | |
67 | mod t)) | |
68 | (when (and (string-match "\\\\(" string) | |
69 | (string-match "\\\\)" string) | |
70 | (not (string-match "\\`(\\|[^\\][()]" string))) | |
71 | (setq string (mm-replace-in-string string "\\\\\\([()]\\)" "\\1") | |
72 | mod t)) | |
73 | (or (and mod | |
74 | (ignore-errors | |
75 | (mail-header-remove-whitespace | |
76 | (mail-header-remove-comments string)))) | |
77 | ;; Finally, attempt to extract only type. | |
78 | (if (string-match | |
79 | (concat "\\`[\t\n ]*\\([^" ietf-drums-tspecials "\t\n ]+" | |
01c52d31 MB |
80 | "\\(?:/[^" ietf-drums-tspecials |
81 | "\t\n ]+\\)?\\)\\(?:[\t\n ;]\\|\\'\\)") | |
cf5a5c38 MB |
82 | string) |
83 | (match-string 1 string) | |
84 | "")))))) | |
c113de23 GM |
85 | (let ((table (copy-syntax-table ietf-drums-syntax-table))) |
86 | (modify-syntax-entry ?\' "w" table) | |
23f87bed MB |
87 | (modify-syntax-entry ?* " " table) |
88 | (modify-syntax-entry ?\; " " table) | |
89 | (modify-syntax-entry ?= " " table) | |
c113de23 GM |
90 | ;; The following isn't valid, but one should be liberal |
91 | ;; in what one receives. | |
92 | (modify-syntax-entry ?\: "w" table) | |
93 | (set-syntax-table table)) | |
94 | (setq c (char-after)) | |
95 | (when (and (memq c ttoken) | |
cf5a5c38 MB |
96 | (not (memq c stoken)) |
97 | (setq type (ignore-errors | |
98 | (downcase | |
99 | (buffer-substring (point) (progn | |
100 | (forward-sexp 1) | |
101 | (point))))))) | |
c113de23 | 102 | ;; Do the params |
c96ec15a MB |
103 | (condition-case err |
104 | (progn | |
105 | (while (not (eobp)) | |
c113de23 | 106 | (setq c (char-after)) |
c96ec15a MB |
107 | (unless (eq c ?\;) |
108 | (error "Invalid header: %s" string)) | |
109 | (forward-char 1) | |
110 | ;; If c in nil, then this is an invalid header, but | |
111 | ;; since elm generates invalid headers on this form, | |
112 | ;; we allow it. | |
113 | (when (setq c (char-after)) | |
114 | (if (and (memq c ttoken) | |
115 | (not (memq c stoken))) | |
116 | (setq attribute | |
117 | (intern | |
118 | (downcase | |
119 | (buffer-substring | |
120 | (point) (progn (forward-sexp 1) (point)))))) | |
121 | (error "Invalid header: %s" string)) | |
122 | (setq c (char-after)) | |
34128042 MB |
123 | (if (eq c ?*) |
124 | (progn | |
c96ec15a | 125 | (forward-char 1) |
34128042 MB |
126 | (setq c (char-after)) |
127 | (if (not (memq c ntoken)) | |
128 | (setq encoded t | |
129 | number nil) | |
130 | (setq number | |
131 | (string-to-number | |
132 | (buffer-substring | |
133 | (point) (progn (forward-sexp 1) (point))))) | |
134 | (setq c (char-after)) | |
135 | (when (eq c ?*) | |
136 | (setq encoded t) | |
137 | (forward-char 1) | |
138 | (setq c (char-after))))) | |
139 | (setq number nil | |
140 | encoded nil)) | |
c96ec15a MB |
141 | (unless (eq c ?=) |
142 | (error "Invalid header: %s" string)) | |
c113de23 | 143 | (forward-char 1) |
c96ec15a MB |
144 | (setq c (char-after)) |
145 | (cond | |
146 | ((eq c ?\") | |
147 | (setq value (buffer-substring (1+ (point)) | |
148 | (progn | |
149 | (forward-sexp 1) | |
34128042 MB |
150 | (1- (point))))) |
151 | (when encoded | |
152 | (setq value (mapconcat (lambda (c) (format "%%%02x" c)) | |
153 | value "")))) | |
c96ec15a MB |
154 | ((and (or (memq c ttoken) |
155 | ;; EXTENSION: Support non-ascii chars. | |
156 | (> c ?\177)) | |
157 | (not (memq c stoken))) | |
158 | (setq value | |
159 | (buffer-substring | |
58090a8d MB |
160 | (point) |
161 | (progn | |
a367620f MB |
162 | ;; Jump over asterisk, non-ASCII |
163 | ;; and non-boundary characters. | |
164 | (while (and c | |
165 | (or (eq c ?*) | |
166 | (> c ?\177) | |
167 | (not (eq (char-syntax c) ? )))) | |
58090a8d | 168 | (forward-char 1) |
a367620f | 169 | (setq c (char-after))) |
58090a8d | 170 | (point))))) |
c96ec15a MB |
171 | (t |
172 | (error "Invalid header: %s" string))) | |
01c52d31 MB |
173 | (push (list attribute value number encoded) |
174 | parameters)))) | |
c96ec15a MB |
175 | (error |
176 | (setq parameters nil) | |
01c52d31 MB |
177 | (when signal-error |
178 | (signal (car err) (cdr err))))) | |
179 | ||
180 | ;; Now collect and concatenate continuation parameters. | |
181 | (let ((cparams nil) | |
182 | elem) | |
183 | (loop for (attribute value part encoded) | |
184 | in (sort parameters (lambda (e1 e2) | |
185 | (< (or (caddr e1) 0) | |
186 | (or (caddr e2) 0)))) | |
ab67634f G |
187 | do (cond |
188 | ;; First part. | |
189 | ((or (not (setq elem (assq attribute cparams))) | |
190 | (and (numberp part) | |
191 | (zerop part))) | |
192 | (push (list attribute value encoded) cparams)) | |
193 | ;; Repetition of a part; do nothing. | |
194 | ((and elem | |
195 | (null number)) | |
196 | ) | |
197 | ;; Concatenate continuation parts. | |
198 | (t | |
199 | (setcar (cdr elem) (concat (cadr elem) value))))) | |
01c52d31 MB |
200 | ;; Finally decode encoded values. |
201 | (cons type (mapcar | |
202 | (lambda (elem) | |
203 | (cons (car elem) | |
204 | (if (nth 2 elem) | |
205 | (rfc2231-decode-encoded-string (nth 1 elem)) | |
206 | (nth 1 elem)))) | |
207 | (nreverse cparams)))))))) | |
c113de23 GM |
208 | |
209 | (defun rfc2231-decode-encoded-string (string) | |
210 | "Decode an RFC2231-encoded string. | |
34128042 MB |
211 | These look like: |
212 | \"us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\", | |
213 | \"us-ascii''This%20is%20%2A%2A%2Afun%2A%2A%2A\", | |
214 | \"'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\", | |
215 | \"''This%20is%20%2A%2A%2Afun%2A%2A%2A\", or | |
216 | \"This is ***fun***\"." | |
01c52d31 | 217 | (string-match "\\`\\(?:\\([^']+\\)?'\\([^']+\\)?'\\)?\\(.+\\)" string) |
07178229 KY |
218 | (let ((coding-system (mm-charset-to-coding-system |
219 | (match-string 1 string) nil t)) | |
01c52d31 MB |
220 | ;;(language (match-string 2 string)) |
221 | (value (match-string 3 string))) | |
e9e33c19 | 222 | (mm-with-unibyte-buffer |
34128042 | 223 | (insert value) |
c113de23 | 224 | (goto-char (point-min)) |
656b4fa1 | 225 | (while (re-search-forward "%\\([0-9A-Fa-f][0-9A-Fa-f]\\)" nil t) |
c113de23 GM |
226 | (insert |
227 | (prog1 | |
656b4fa1 MB |
228 | (string-to-number (match-string 1) 16) |
229 | (delete-region (match-beginning 0) (match-end 0))))) | |
34128042 | 230 | ;; Decode using the charset, if any. |
e9e33c19 KH |
231 | (if (memq coding-system '(nil ascii)) |
232 | (buffer-string) | |
233 | (mm-decode-coding-string (buffer-string) coding-system))))) | |
c113de23 GM |
234 | |
235 | (defun rfc2231-encode-string (param value) | |
c96ec15a MB |
236 | "Return and PARAM=VALUE string encoded according to RFC2231. |
237 | Use `mml-insert-parameter' or `mml-insert-parameter-string' to insert | |
238 | the result of this function." | |
c113de23 GM |
239 | (let ((control (ietf-drums-token-to-list ietf-drums-no-ws-ctl-token)) |
240 | (tspecial (ietf-drums-token-to-list ietf-drums-tspecials)) | |
241 | (special (ietf-drums-token-to-list "*'%\n\t")) | |
242 | (ascii (ietf-drums-token-to-list ietf-drums-text-token)) | |
243 | (num -1) | |
c96ec15a | 244 | ;; Don't make lines exceeding 76 column. |
6203370b | 245 | (limit (- 74 (length param))) |
c113de23 | 246 | spacep encodep charsetp charset broken) |
e9e33c19 | 247 | (mm-with-multibyte-buffer |
c113de23 GM |
248 | (insert value) |
249 | (goto-char (point-min)) | |
250 | (while (not (eobp)) | |
251 | (cond | |
252 | ((or (memq (following-char) control) | |
253 | (memq (following-char) tspecial) | |
254 | (memq (following-char) special)) | |
255 | (setq encodep t)) | |
256 | ((eq (following-char) ? ) | |
257 | (setq spacep t)) | |
258 | ((not (memq (following-char) ascii)) | |
259 | (setq charsetp t))) | |
260 | (forward-char 1)) | |
261 | (when charsetp | |
262 | (setq charset (mm-encode-body))) | |
4f162824 | 263 | (mm-disable-multibyte) |
c113de23 | 264 | (cond |
6203370b MB |
265 | ((or encodep charsetp |
266 | (progn | |
267 | (end-of-line) | |
268 | (> (current-column) (if spacep (- limit 2) limit)))) | |
269 | (setq limit (- limit 6)) | |
c113de23 | 270 | (goto-char (point-min)) |
6203370b | 271 | (insert (symbol-name (or charset 'us-ascii)) "''") |
c113de23 | 272 | (while (not (eobp)) |
c113de23 GM |
273 | (if (or (not (memq (following-char) ascii)) |
274 | (memq (following-char) control) | |
275 | (memq (following-char) tspecial) | |
276 | (memq (following-char) special) | |
277 | (eq (following-char) ? )) | |
278 | (progn | |
6203370b MB |
279 | (when (>= (current-column) (1- limit)) |
280 | (insert ";\n") | |
281 | (setq broken t)) | |
c113de23 GM |
282 | (insert "%" (format "%02x" (following-char))) |
283 | (delete-char 1)) | |
6203370b MB |
284 | (when (> (current-column) limit) |
285 | (insert ";\n") | |
286 | (setq broken t)) | |
c113de23 GM |
287 | (forward-char 1))) |
288 | (goto-char (point-min)) | |
c113de23 GM |
289 | (if (not broken) |
290 | (insert param "*=") | |
291 | (while (not (eobp)) | |
c96ec15a | 292 | (insert (if (>= num 0) " " "") |
23f87bed | 293 | param "*" (format "%d" (incf num)) "*=") |
c113de23 GM |
294 | (forward-line 1)))) |
295 | (spacep | |
296 | (goto-char (point-min)) | |
34128042 | 297 | (insert param "=\"") |
c113de23 GM |
298 | (goto-char (point-max)) |
299 | (insert "\"")) | |
300 | (t | |
301 | (goto-char (point-min)) | |
34128042 | 302 | (insert param "="))) |
c113de23 GM |
303 | (buffer-string)))) |
304 | ||
305 | (provide 'rfc2231) | |
306 | ||
307 | ;;; rfc2231.el ends here |