Fix up comment convention on the arch-tag lines.
[bpt/emacs.git] / lisp / gnus / rfc2231.el
CommitLineData
23f87bed 1;;; rfc2231.el --- Functions for decoding rfc2231 headers
c113de23 2
61e1e4e8 3;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
e3fe4da0 4;; 2006, 2007, 2008 Free Software Foundation, Inc.
c113de23
GM
5
6;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
7;; This file is part of GNU Emacs.
8
9;; GNU Emacs is free software; you can redistribute it and/or modify
10;; it under the terms of the GNU General Public License as published by
5a9dffec 11;; the Free Software Foundation; either version 3, or (at your option)
c113de23
GM
12;; any later version.
13
14;; GNU Emacs is distributed in the hope that it will be useful,
15;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17;; GNU General Public License for more details.
18
19;; You should have received a copy of the GNU General Public License
20;; along with GNU Emacs; see the file COPYING. If not, write to the
3a35cf56
LK
21;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22;; Boston, MA 02110-1301, USA.
c113de23
GM
23
24;;; Commentary:
25
26;;; Code:
27
e017ba35 28(eval-when-compile (require 'cl))
c113de23 29(require 'ietf-drums)
23f87bed
MB
30(require 'rfc2047)
31(autoload 'mm-encode-body "mm-bodies")
32(autoload 'mail-header-remove-whitespace "mail-parse")
33(autoload 'mail-header-remove-comments "mail-parse")
c113de23
GM
34
35(defun rfc2231-get-value (ct attribute)
36 "Return the value of ATTRIBUTE from CT."
37 (cdr (assq attribute (cdr ct))))
38
23f87bed
MB
39(defun rfc2231-parse-qp-string (string)
40 "Parse QP-encoded string using `rfc2231-parse-string'.
41N.B. This is in violation with RFC2047, but it seem to be in common use."
42 (rfc2231-parse-string (rfc2047-decode-string string)))
43
c96ec15a 44(defun rfc2231-parse-string (string &optional signal-error)
c113de23
GM
45 "Parse STRING and return a list.
46The list will be on the form
c96ec15a
MB
47 `(name (attribute . value) (attribute . value)...)'.
48
49If the optional SIGNAL-ERROR is non-nil, signal an error when this
cf5a5c38
MB
50function fails in parsing of parameters. Otherwise, this function
51must never cause a Lisp error."
c113de23
GM
52 (with-temp-buffer
53 (let ((ttoken (ietf-drums-token-to-list ietf-drums-text-token))
54 (stoken (ietf-drums-token-to-list ietf-drums-tspecials))
55 (ntoken (ietf-drums-token-to-list "0-9"))
01c52d31 56 c type attribute encoded number parameters value)
cf5a5c38
MB
57 (ietf-drums-init
58 (condition-case nil
59 (mail-header-remove-whitespace
60 (mail-header-remove-comments string))
61 ;; The most likely cause of an error is unbalanced parentheses
62 ;; or double-quotes. If all parentheses and double-quotes are
63 ;; quoted meaninglessly with backslashes, removing them might
64 ;; make it parseable. Let's try...
65 (error
66 (let (mod)
67 (when (and (string-match "\\\\\"" string)
68 (not (string-match "\\`\"\\|[^\\]\"" string)))
69 (setq string (mm-replace-in-string string "\\\\\"" "\"")
70 mod t))
71 (when (and (string-match "\\\\(" string)
72 (string-match "\\\\)" string)
73 (not (string-match "\\`(\\|[^\\][()]" string)))
74 (setq string (mm-replace-in-string string "\\\\\\([()]\\)" "\\1")
75 mod t))
76 (or (and mod
77 (ignore-errors
78 (mail-header-remove-whitespace
79 (mail-header-remove-comments string))))
80 ;; Finally, attempt to extract only type.
81 (if (string-match
82 (concat "\\`[\t\n ]*\\([^" ietf-drums-tspecials "\t\n ]+"
01c52d31
MB
83 "\\(?:/[^" ietf-drums-tspecials
84 "\t\n ]+\\)?\\)\\(?:[\t\n ;]\\|\\'\\)")
cf5a5c38
MB
85 string)
86 (match-string 1 string)
87 ""))))))
c113de23
GM
88 (let ((table (copy-syntax-table ietf-drums-syntax-table)))
89 (modify-syntax-entry ?\' "w" table)
23f87bed
MB
90 (modify-syntax-entry ?* " " table)
91 (modify-syntax-entry ?\; " " table)
92 (modify-syntax-entry ?= " " table)
c113de23
GM
93 ;; The following isn't valid, but one should be liberal
94 ;; in what one receives.
95 (modify-syntax-entry ?\: "w" table)
96 (set-syntax-table table))
97 (setq c (char-after))
98 (when (and (memq c ttoken)
cf5a5c38
MB
99 (not (memq c stoken))
100 (setq type (ignore-errors
101 (downcase
102 (buffer-substring (point) (progn
103 (forward-sexp 1)
104 (point)))))))
c113de23 105 ;; Do the params
c96ec15a
MB
106 (condition-case err
107 (progn
108 (while (not (eobp))
c113de23 109 (setq c (char-after))
c96ec15a
MB
110 (unless (eq c ?\;)
111 (error "Invalid header: %s" string))
112 (forward-char 1)
113 ;; If c in nil, then this is an invalid header, but
114 ;; since elm generates invalid headers on this form,
115 ;; we allow it.
116 (when (setq c (char-after))
117 (if (and (memq c ttoken)
118 (not (memq c stoken)))
119 (setq attribute
120 (intern
121 (downcase
122 (buffer-substring
123 (point) (progn (forward-sexp 1) (point))))))
124 (error "Invalid header: %s" string))
125 (setq c (char-after))
34128042
MB
126 (if (eq c ?*)
127 (progn
c96ec15a 128 (forward-char 1)
34128042
MB
129 (setq c (char-after))
130 (if (not (memq c ntoken))
131 (setq encoded t
132 number nil)
133 (setq number
134 (string-to-number
135 (buffer-substring
136 (point) (progn (forward-sexp 1) (point)))))
137 (setq c (char-after))
138 (when (eq c ?*)
139 (setq encoded t)
140 (forward-char 1)
141 (setq c (char-after)))))
142 (setq number nil
143 encoded nil))
c96ec15a
MB
144 (unless (eq c ?=)
145 (error "Invalid header: %s" string))
c113de23 146 (forward-char 1)
c96ec15a
MB
147 (setq c (char-after))
148 (cond
149 ((eq c ?\")
150 (setq value (buffer-substring (1+ (point))
151 (progn
152 (forward-sexp 1)
34128042
MB
153 (1- (point)))))
154 (when encoded
155 (setq value (mapconcat (lambda (c) (format "%%%02x" c))
156 value ""))))
c96ec15a
MB
157 ((and (or (memq c ttoken)
158 ;; EXTENSION: Support non-ascii chars.
159 (> c ?\177))
160 (not (memq c stoken)))
161 (setq value
162 (buffer-substring
58090a8d
MB
163 (point)
164 (progn
a367620f
MB
165 ;; Jump over asterisk, non-ASCII
166 ;; and non-boundary characters.
167 (while (and c
168 (or (eq c ?*)
169 (> c ?\177)
170 (not (eq (char-syntax c) ? ))))
58090a8d 171 (forward-char 1)
a367620f 172 (setq c (char-after)))
58090a8d 173 (point)))))
c96ec15a
MB
174 (t
175 (error "Invalid header: %s" string)))
01c52d31
MB
176 (push (list attribute value number encoded)
177 parameters))))
c96ec15a
MB
178 (error
179 (setq parameters nil)
01c52d31
MB
180 (when signal-error
181 (signal (car err) (cdr err)))))
182
183 ;; Now collect and concatenate continuation parameters.
184 (let ((cparams nil)
185 elem)
186 (loop for (attribute value part encoded)
187 in (sort parameters (lambda (e1 e2)
188 (< (or (caddr e1) 0)
189 (or (caddr e2) 0))))
190 do (if (or (not (setq elem (assq attribute cparams)))
191 (and (numberp part)
192 (zerop part)))
193 (push (list attribute value encoded) cparams)
194 (setcar (cdr elem) (concat (cadr elem) value))))
195 ;; Finally decode encoded values.
196 (cons type (mapcar
197 (lambda (elem)
198 (cons (car elem)
199 (if (nth 2 elem)
200 (rfc2231-decode-encoded-string (nth 1 elem))
201 (nth 1 elem))))
202 (nreverse cparams))))))))
c113de23
GM
203
204(defun rfc2231-decode-encoded-string (string)
205 "Decode an RFC2231-encoded string.
34128042
MB
206These look like:
207 \"us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\",
208 \"us-ascii''This%20is%20%2A%2A%2Afun%2A%2A%2A\",
209 \"'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\",
210 \"''This%20is%20%2A%2A%2Afun%2A%2A%2A\", or
211 \"This is ***fun***\"."
01c52d31
MB
212 (string-match "\\`\\(?:\\([^']+\\)?'\\([^']+\\)?'\\)?\\(.+\\)" string)
213 (let ((coding-system (mm-charset-to-coding-system (match-string 1 string)))
214 ;;(language (match-string 2 string))
215 (value (match-string 3 string)))
e9e33c19 216 (mm-with-unibyte-buffer
34128042 217 (insert value)
c113de23
GM
218 (goto-char (point-min))
219 (while (search-forward "%" nil t)
220 (insert
221 (prog1
222 (string-to-number (buffer-substring (point) (+ (point) 2)) 16)
223 (delete-region (1- (point)) (+ (point) 2)))))
34128042 224 ;; Decode using the charset, if any.
e9e33c19
KH
225 (if (memq coding-system '(nil ascii))
226 (buffer-string)
227 (mm-decode-coding-string (buffer-string) coding-system)))))
c113de23
GM
228
229(defun rfc2231-encode-string (param value)
c96ec15a
MB
230 "Return and PARAM=VALUE string encoded according to RFC2231.
231Use `mml-insert-parameter' or `mml-insert-parameter-string' to insert
232the result of this function."
c113de23
GM
233 (let ((control (ietf-drums-token-to-list ietf-drums-no-ws-ctl-token))
234 (tspecial (ietf-drums-token-to-list ietf-drums-tspecials))
235 (special (ietf-drums-token-to-list "*'%\n\t"))
236 (ascii (ietf-drums-token-to-list ietf-drums-text-token))
237 (num -1)
c96ec15a 238 ;; Don't make lines exceeding 76 column.
6203370b 239 (limit (- 74 (length param)))
c113de23 240 spacep encodep charsetp charset broken)
e9e33c19 241 (mm-with-multibyte-buffer
c113de23
GM
242 (insert value)
243 (goto-char (point-min))
244 (while (not (eobp))
245 (cond
246 ((or (memq (following-char) control)
247 (memq (following-char) tspecial)
248 (memq (following-char) special))
249 (setq encodep t))
250 ((eq (following-char) ? )
251 (setq spacep t))
252 ((not (memq (following-char) ascii))
253 (setq charsetp t)))
254 (forward-char 1))
255 (when charsetp
256 (setq charset (mm-encode-body)))
4f162824 257 (mm-disable-multibyte)
c113de23 258 (cond
6203370b
MB
259 ((or encodep charsetp
260 (progn
261 (end-of-line)
262 (> (current-column) (if spacep (- limit 2) limit))))
263 (setq limit (- limit 6))
c113de23 264 (goto-char (point-min))
6203370b 265 (insert (symbol-name (or charset 'us-ascii)) "''")
c113de23 266 (while (not (eobp))
c113de23
GM
267 (if (or (not (memq (following-char) ascii))
268 (memq (following-char) control)
269 (memq (following-char) tspecial)
270 (memq (following-char) special)
271 (eq (following-char) ? ))
272 (progn
6203370b
MB
273 (when (>= (current-column) (1- limit))
274 (insert ";\n")
275 (setq broken t))
c113de23
GM
276 (insert "%" (format "%02x" (following-char)))
277 (delete-char 1))
6203370b
MB
278 (when (> (current-column) limit)
279 (insert ";\n")
280 (setq broken t))
c113de23
GM
281 (forward-char 1)))
282 (goto-char (point-min))
c113de23
GM
283 (if (not broken)
284 (insert param "*=")
285 (while (not (eobp))
c96ec15a 286 (insert (if (>= num 0) " " "")
23f87bed 287 param "*" (format "%d" (incf num)) "*=")
c113de23
GM
288 (forward-line 1))))
289 (spacep
290 (goto-char (point-min))
34128042 291 (insert param "=\"")
c113de23
GM
292 (goto-char (point-max))
293 (insert "\""))
294 (t
295 (goto-char (point-min))
34128042 296 (insert param "=")))
c113de23
GM
297 (buffer-string))))
298
299(provide 'rfc2231)
300
cbee283d 301;; arch-tag: c3ab751d-d108-406a-b301-68882ad8cd63
c113de23 302;;; rfc2231.el ends here