Commit | Line | Data |
---|---|---|
23f87bed | 1 | ;;; rfc2231.el --- Functions for decoding rfc2231 headers |
c113de23 | 2 | |
61e1e4e8 | 3 | ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, |
e3fe4da0 | 4 | ;; 2006, 2007, 2008 Free Software Foundation, Inc. |
c113de23 GM |
5 | |
6 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
7 | ;; This file is part of GNU Emacs. | |
8 | ||
9 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
10 | ;; it under the terms of the GNU General Public License as published by | |
5a9dffec | 11 | ;; the Free Software Foundation; either version 3, or (at your option) |
c113de23 GM |
12 | ;; any later version. |
13 | ||
14 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
15 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | ;; GNU General Public License for more details. | |
18 | ||
19 | ;; You should have received a copy of the GNU General Public License | |
20 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
3a35cf56 LK |
21 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
22 | ;; Boston, MA 02110-1301, USA. | |
c113de23 GM |
23 | |
24 | ;;; Commentary: | |
25 | ||
26 | ;;; Code: | |
27 | ||
e017ba35 | 28 | (eval-when-compile (require 'cl)) |
c113de23 | 29 | (require 'ietf-drums) |
23f87bed MB |
30 | (require 'rfc2047) |
31 | (autoload 'mm-encode-body "mm-bodies") | |
32 | (autoload 'mail-header-remove-whitespace "mail-parse") | |
33 | (autoload 'mail-header-remove-comments "mail-parse") | |
c113de23 GM |
34 | |
35 | (defun rfc2231-get-value (ct attribute) | |
36 | "Return the value of ATTRIBUTE from CT." | |
37 | (cdr (assq attribute (cdr ct)))) | |
38 | ||
23f87bed MB |
39 | (defun rfc2231-parse-qp-string (string) |
40 | "Parse QP-encoded string using `rfc2231-parse-string'. | |
41 | N.B. This is in violation with RFC2047, but it seem to be in common use." | |
42 | (rfc2231-parse-string (rfc2047-decode-string string))) | |
43 | ||
c96ec15a | 44 | (defun rfc2231-parse-string (string &optional signal-error) |
c113de23 GM |
45 | "Parse STRING and return a list. |
46 | The list will be on the form | |
c96ec15a MB |
47 | `(name (attribute . value) (attribute . value)...)'. |
48 | ||
49 | If the optional SIGNAL-ERROR is non-nil, signal an error when this | |
cf5a5c38 MB |
50 | function fails in parsing of parameters. Otherwise, this function |
51 | must never cause a Lisp error." | |
c113de23 GM |
52 | (with-temp-buffer |
53 | (let ((ttoken (ietf-drums-token-to-list ietf-drums-text-token)) | |
54 | (stoken (ietf-drums-token-to-list ietf-drums-tspecials)) | |
55 | (ntoken (ietf-drums-token-to-list "0-9")) | |
01c52d31 | 56 | c type attribute encoded number parameters value) |
cf5a5c38 MB |
57 | (ietf-drums-init |
58 | (condition-case nil | |
59 | (mail-header-remove-whitespace | |
60 | (mail-header-remove-comments string)) | |
61 | ;; The most likely cause of an error is unbalanced parentheses | |
62 | ;; or double-quotes. If all parentheses and double-quotes are | |
63 | ;; quoted meaninglessly with backslashes, removing them might | |
64 | ;; make it parseable. Let's try... | |
65 | (error | |
66 | (let (mod) | |
67 | (when (and (string-match "\\\\\"" string) | |
68 | (not (string-match "\\`\"\\|[^\\]\"" string))) | |
69 | (setq string (mm-replace-in-string string "\\\\\"" "\"") | |
70 | mod t)) | |
71 | (when (and (string-match "\\\\(" string) | |
72 | (string-match "\\\\)" string) | |
73 | (not (string-match "\\`(\\|[^\\][()]" string))) | |
74 | (setq string (mm-replace-in-string string "\\\\\\([()]\\)" "\\1") | |
75 | mod t)) | |
76 | (or (and mod | |
77 | (ignore-errors | |
78 | (mail-header-remove-whitespace | |
79 | (mail-header-remove-comments string)))) | |
80 | ;; Finally, attempt to extract only type. | |
81 | (if (string-match | |
82 | (concat "\\`[\t\n ]*\\([^" ietf-drums-tspecials "\t\n ]+" | |
01c52d31 MB |
83 | "\\(?:/[^" ietf-drums-tspecials |
84 | "\t\n ]+\\)?\\)\\(?:[\t\n ;]\\|\\'\\)") | |
cf5a5c38 MB |
85 | string) |
86 | (match-string 1 string) | |
87 | "")))))) | |
c113de23 GM |
88 | (let ((table (copy-syntax-table ietf-drums-syntax-table))) |
89 | (modify-syntax-entry ?\' "w" table) | |
23f87bed MB |
90 | (modify-syntax-entry ?* " " table) |
91 | (modify-syntax-entry ?\; " " table) | |
92 | (modify-syntax-entry ?= " " table) | |
c113de23 GM |
93 | ;; The following isn't valid, but one should be liberal |
94 | ;; in what one receives. | |
95 | (modify-syntax-entry ?\: "w" table) | |
96 | (set-syntax-table table)) | |
97 | (setq c (char-after)) | |
98 | (when (and (memq c ttoken) | |
cf5a5c38 MB |
99 | (not (memq c stoken)) |
100 | (setq type (ignore-errors | |
101 | (downcase | |
102 | (buffer-substring (point) (progn | |
103 | (forward-sexp 1) | |
104 | (point))))))) | |
c113de23 | 105 | ;; Do the params |
c96ec15a MB |
106 | (condition-case err |
107 | (progn | |
108 | (while (not (eobp)) | |
c113de23 | 109 | (setq c (char-after)) |
c96ec15a MB |
110 | (unless (eq c ?\;) |
111 | (error "Invalid header: %s" string)) | |
112 | (forward-char 1) | |
113 | ;; If c in nil, then this is an invalid header, but | |
114 | ;; since elm generates invalid headers on this form, | |
115 | ;; we allow it. | |
116 | (when (setq c (char-after)) | |
117 | (if (and (memq c ttoken) | |
118 | (not (memq c stoken))) | |
119 | (setq attribute | |
120 | (intern | |
121 | (downcase | |
122 | (buffer-substring | |
123 | (point) (progn (forward-sexp 1) (point)))))) | |
124 | (error "Invalid header: %s" string)) | |
125 | (setq c (char-after)) | |
34128042 MB |
126 | (if (eq c ?*) |
127 | (progn | |
c96ec15a | 128 | (forward-char 1) |
34128042 MB |
129 | (setq c (char-after)) |
130 | (if (not (memq c ntoken)) | |
131 | (setq encoded t | |
132 | number nil) | |
133 | (setq number | |
134 | (string-to-number | |
135 | (buffer-substring | |
136 | (point) (progn (forward-sexp 1) (point))))) | |
137 | (setq c (char-after)) | |
138 | (when (eq c ?*) | |
139 | (setq encoded t) | |
140 | (forward-char 1) | |
141 | (setq c (char-after))))) | |
142 | (setq number nil | |
143 | encoded nil)) | |
c96ec15a MB |
144 | (unless (eq c ?=) |
145 | (error "Invalid header: %s" string)) | |
c113de23 | 146 | (forward-char 1) |
c96ec15a MB |
147 | (setq c (char-after)) |
148 | (cond | |
149 | ((eq c ?\") | |
150 | (setq value (buffer-substring (1+ (point)) | |
151 | (progn | |
152 | (forward-sexp 1) | |
34128042 MB |
153 | (1- (point))))) |
154 | (when encoded | |
155 | (setq value (mapconcat (lambda (c) (format "%%%02x" c)) | |
156 | value "")))) | |
c96ec15a MB |
157 | ((and (or (memq c ttoken) |
158 | ;; EXTENSION: Support non-ascii chars. | |
159 | (> c ?\177)) | |
160 | (not (memq c stoken))) | |
161 | (setq value | |
162 | (buffer-substring | |
58090a8d MB |
163 | (point) |
164 | (progn | |
a367620f MB |
165 | ;; Jump over asterisk, non-ASCII |
166 | ;; and non-boundary characters. | |
167 | (while (and c | |
168 | (or (eq c ?*) | |
169 | (> c ?\177) | |
170 | (not (eq (char-syntax c) ? )))) | |
58090a8d | 171 | (forward-char 1) |
a367620f | 172 | (setq c (char-after))) |
58090a8d | 173 | (point))))) |
c96ec15a MB |
174 | (t |
175 | (error "Invalid header: %s" string))) | |
01c52d31 MB |
176 | (push (list attribute value number encoded) |
177 | parameters)))) | |
c96ec15a MB |
178 | (error |
179 | (setq parameters nil) | |
01c52d31 MB |
180 | (when signal-error |
181 | (signal (car err) (cdr err))))) | |
182 | ||
183 | ;; Now collect and concatenate continuation parameters. | |
184 | (let ((cparams nil) | |
185 | elem) | |
186 | (loop for (attribute value part encoded) | |
187 | in (sort parameters (lambda (e1 e2) | |
188 | (< (or (caddr e1) 0) | |
189 | (or (caddr e2) 0)))) | |
190 | do (if (or (not (setq elem (assq attribute cparams))) | |
191 | (and (numberp part) | |
192 | (zerop part))) | |
193 | (push (list attribute value encoded) cparams) | |
194 | (setcar (cdr elem) (concat (cadr elem) value)))) | |
195 | ;; Finally decode encoded values. | |
196 | (cons type (mapcar | |
197 | (lambda (elem) | |
198 | (cons (car elem) | |
199 | (if (nth 2 elem) | |
200 | (rfc2231-decode-encoded-string (nth 1 elem)) | |
201 | (nth 1 elem)))) | |
202 | (nreverse cparams)))))))) | |
c113de23 GM |
203 | |
204 | (defun rfc2231-decode-encoded-string (string) | |
205 | "Decode an RFC2231-encoded string. | |
34128042 MB |
206 | These look like: |
207 | \"us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\", | |
208 | \"us-ascii''This%20is%20%2A%2A%2Afun%2A%2A%2A\", | |
209 | \"'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\", | |
210 | \"''This%20is%20%2A%2A%2Afun%2A%2A%2A\", or | |
211 | \"This is ***fun***\"." | |
01c52d31 MB |
212 | (string-match "\\`\\(?:\\([^']+\\)?'\\([^']+\\)?'\\)?\\(.+\\)" string) |
213 | (let ((coding-system (mm-charset-to-coding-system (match-string 1 string))) | |
214 | ;;(language (match-string 2 string)) | |
215 | (value (match-string 3 string))) | |
e9e33c19 | 216 | (mm-with-unibyte-buffer |
34128042 | 217 | (insert value) |
c113de23 GM |
218 | (goto-char (point-min)) |
219 | (while (search-forward "%" nil t) | |
220 | (insert | |
221 | (prog1 | |
222 | (string-to-number (buffer-substring (point) (+ (point) 2)) 16) | |
223 | (delete-region (1- (point)) (+ (point) 2))))) | |
34128042 | 224 | ;; Decode using the charset, if any. |
e9e33c19 KH |
225 | (if (memq coding-system '(nil ascii)) |
226 | (buffer-string) | |
227 | (mm-decode-coding-string (buffer-string) coding-system))))) | |
c113de23 GM |
228 | |
229 | (defun rfc2231-encode-string (param value) | |
c96ec15a MB |
230 | "Return and PARAM=VALUE string encoded according to RFC2231. |
231 | Use `mml-insert-parameter' or `mml-insert-parameter-string' to insert | |
232 | the result of this function." | |
c113de23 GM |
233 | (let ((control (ietf-drums-token-to-list ietf-drums-no-ws-ctl-token)) |
234 | (tspecial (ietf-drums-token-to-list ietf-drums-tspecials)) | |
235 | (special (ietf-drums-token-to-list "*'%\n\t")) | |
236 | (ascii (ietf-drums-token-to-list ietf-drums-text-token)) | |
237 | (num -1) | |
c96ec15a | 238 | ;; Don't make lines exceeding 76 column. |
6203370b | 239 | (limit (- 74 (length param))) |
c113de23 | 240 | spacep encodep charsetp charset broken) |
e9e33c19 | 241 | (mm-with-multibyte-buffer |
c113de23 GM |
242 | (insert value) |
243 | (goto-char (point-min)) | |
244 | (while (not (eobp)) | |
245 | (cond | |
246 | ((or (memq (following-char) control) | |
247 | (memq (following-char) tspecial) | |
248 | (memq (following-char) special)) | |
249 | (setq encodep t)) | |
250 | ((eq (following-char) ? ) | |
251 | (setq spacep t)) | |
252 | ((not (memq (following-char) ascii)) | |
253 | (setq charsetp t))) | |
254 | (forward-char 1)) | |
255 | (when charsetp | |
256 | (setq charset (mm-encode-body))) | |
4f162824 | 257 | (mm-disable-multibyte) |
c113de23 | 258 | (cond |
6203370b MB |
259 | ((or encodep charsetp |
260 | (progn | |
261 | (end-of-line) | |
262 | (> (current-column) (if spacep (- limit 2) limit)))) | |
263 | (setq limit (- limit 6)) | |
c113de23 | 264 | (goto-char (point-min)) |
6203370b | 265 | (insert (symbol-name (or charset 'us-ascii)) "''") |
c113de23 | 266 | (while (not (eobp)) |
c113de23 GM |
267 | (if (or (not (memq (following-char) ascii)) |
268 | (memq (following-char) control) | |
269 | (memq (following-char) tspecial) | |
270 | (memq (following-char) special) | |
271 | (eq (following-char) ? )) | |
272 | (progn | |
6203370b MB |
273 | (when (>= (current-column) (1- limit)) |
274 | (insert ";\n") | |
275 | (setq broken t)) | |
c113de23 GM |
276 | (insert "%" (format "%02x" (following-char))) |
277 | (delete-char 1)) | |
6203370b MB |
278 | (when (> (current-column) limit) |
279 | (insert ";\n") | |
280 | (setq broken t)) | |
c113de23 GM |
281 | (forward-char 1))) |
282 | (goto-char (point-min)) | |
c113de23 GM |
283 | (if (not broken) |
284 | (insert param "*=") | |
285 | (while (not (eobp)) | |
c96ec15a | 286 | (insert (if (>= num 0) " " "") |
23f87bed | 287 | param "*" (format "%d" (incf num)) "*=") |
c113de23 GM |
288 | (forward-line 1)))) |
289 | (spacep | |
290 | (goto-char (point-min)) | |
34128042 | 291 | (insert param "=\"") |
c113de23 GM |
292 | (goto-char (point-max)) |
293 | (insert "\"")) | |
294 | (t | |
295 | (goto-char (point-min)) | |
34128042 | 296 | (insert param "="))) |
c113de23 GM |
297 | (buffer-string)))) |
298 | ||
299 | (provide 'rfc2231) | |
300 | ||
cbee283d | 301 | ;; arch-tag: c3ab751d-d108-406a-b301-68882ad8cd63 |
c113de23 | 302 | ;;; rfc2231.el ends here |