Commit | Line | Data |
---|---|---|
c88ab9ce ER |
1 | ;;; rfc822.el --- hairy rfc822 parser for mail and news and suchlike |
2 | ||
f2e3589a | 3 | ;; Copyright (C) 1986, 1987, 1990, 2001, 2002, 2003, 2004, |
2f043267 | 4 | ;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc. |
9750e079 | 5 | |
4821e2af ER |
6 | ;; Author: Richard Mlynarik <mly@eddie.mit.edu> |
7 | ;; Maintainer: FSF | |
d7b4d18f | 8 | ;; Keywords: mail |
4821e2af | 9 | |
cedaf3aa RS |
10 | ;; This file is part of GNU Emacs. |
11 | ||
12 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
13 | ;; it under the terms of the GNU General Public License as published by | |
ceaeecb0 | 14 | ;; the Free Software Foundation; either version 3, or (at your option) |
cedaf3aa RS |
15 | ;; any later version. |
16 | ||
17 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;; GNU General Public License for more details. | |
21 | ||
22 | ;; You should have received a copy of the GNU General Public License | |
b578f267 | 23 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
3a35cf56 LK |
24 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
25 | ;; Boston, MA 02110-1301, USA. | |
cedaf3aa | 26 | |
d9ecc911 ER |
27 | ;;; Commentary: |
28 | ||
29 | ;; Support functions for parsing RFC-822 headers, used by mail and news | |
a1506d29 | 30 | ;; modes. |
d9ecc911 | 31 | |
4821e2af ER |
32 | ;;; Code: |
33 | ||
d6b4d749 RS |
34 | (defvar rfc822-address-start) |
35 | ||
36 | ;; uses rfc822-address-start free, throws to address | |
cedaf3aa RS |
37 | (defun rfc822-bad-address (reason) |
38 | (save-restriction | |
39 | (insert "_^_") | |
d6b4d749 | 40 | (narrow-to-region rfc822-address-start |
cedaf3aa RS |
41 | (if (re-search-forward "[,;]" nil t) |
42 | (max (point-min) (1- (point))) | |
43 | (point-max))) | |
44 | ;; make the error string be suitable for inclusion in (...) | |
45 | (let ((losers '("\\" "(" ")" "\n"))) | |
46 | (while losers | |
47 | (goto-char (point-min)) | |
48 | (while (search-forward (car losers) nil t) | |
49 | (backward-char 1) | |
50 | (insert ?\\) | |
51 | (forward-char 1)) | |
52 | (setq losers (cdr losers)))) | |
53 | (goto-char (point-min)) (insert "(Unparsable address -- " | |
54 | reason | |
416916d4 | 55 | ": \"") |
cedaf3aa RS |
56 | (goto-char (point-max)) (insert "\")")) |
57 | (rfc822-nuke-whitespace) | |
d6b4d749 | 58 | (throw 'address (buffer-substring rfc822-address-start (point)))) |
cedaf3aa RS |
59 | |
60 | (defun rfc822-nuke-whitespace (&optional leave-space) | |
61 | (let (ch) | |
62 | (while (cond ((eobp) | |
63 | nil) | |
64 | ((= (setq ch (following-char)) ?\() | |
65 | (forward-char 1) | |
66 | (while (if (eobp) | |
67 | (rfc822-bad-address "Unbalanced comment (...)") | |
68 | (/= (setq ch (following-char)) ?\))) | |
69 | (cond ((looking-at "[^()\\]+") | |
70 | (replace-match "")) | |
71 | ((= ch ?\() | |
72 | (rfc822-nuke-whitespace)) | |
73 | ((< (point) (1- (point-max))) | |
74 | (delete-char 2)) | |
75 | (t | |
76 | (rfc822-bad-address "orphaned backslash")))) | |
77 | ;; delete remaining "()" | |
78 | (forward-char -1) | |
79 | (delete-char 2) | |
80 | t) | |
a8eb445b | 81 | ((memq ch '(?\ ?\t ?\n)) |
cedaf3aa RS |
82 | (delete-region (point) |
83 | (progn (skip-chars-forward " \t\n") (point))) | |
84 | t) | |
85 | (t | |
86 | nil))) | |
87 | (or (not leave-space) | |
88 | (eobp) | |
89 | (bobp) | |
90 | (= (preceding-char) ?\ ) | |
91 | (insert ?\ )))) | |
92 | ||
93 | (defun rfc822-looking-at (regex &optional leave-space) | |
94 | (if (cond ((stringp regex) | |
95 | (if (looking-at regex) | |
96 | (progn (goto-char (match-end 0)) | |
97 | t))) | |
98 | (t | |
99 | (if (and (not (eobp)) | |
100 | (= (following-char) regex)) | |
101 | (progn (forward-char 1) | |
102 | t)))) | |
103 | (let ((tem (match-data))) | |
104 | (rfc822-nuke-whitespace leave-space) | |
f6b4e0f4 | 105 | (set-match-data tem) |
cedaf3aa RS |
106 | t))) |
107 | ||
108 | (defun rfc822-snarf-word () | |
109 | ;; word is atom | quoted-string | |
110 | (cond ((= (following-char) ?\") | |
111 | ;; quoted-string | |
112 | (or (rfc822-looking-at "\"\\([^\"\\\n]\\|\\\\.\\|\\\\\n\\)*\"") | |
113 | (rfc822-bad-address "Unterminated quoted string"))) | |
55872f3e | 114 | ((rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\".]+") |
cedaf3aa RS |
115 | ;; atom |
116 | ) | |
117 | (t | |
118 | (rfc822-bad-address "Rubbish in address")))) | |
119 | ||
120 | (defun rfc822-snarf-words () | |
121 | (rfc822-snarf-word) | |
122 | (while (rfc822-looking-at ?.) | |
123 | (rfc822-snarf-word))) | |
124 | ||
125 | (defun rfc822-snarf-subdomain () | |
126 | ;; sub-domain is domain-ref | domain-literal | |
127 | (cond ((= (following-char) ?\[) | |
128 | ;; domain-ref | |
129 | (or (rfc822-looking-at "\\[\\([^][\\\n]\\|\\\\.\\|\\\\\n\\)*\\]") | |
130 | (rfc822-bad-address "Unterminated domain literal [...]"))) | |
55872f3e | 131 | ((rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\".]+") |
cedaf3aa RS |
132 | ;; domain-literal = atom |
133 | ) | |
134 | (t | |
135 | (rfc822-bad-address "Rubbish in host/domain specification")))) | |
136 | ||
137 | (defun rfc822-snarf-domain () | |
138 | (rfc822-snarf-subdomain) | |
139 | (while (rfc822-looking-at ?.) | |
140 | (rfc822-snarf-subdomain))) | |
141 | ||
142 | (defun rfc822-snarf-frob-list (name separator terminator snarfer | |
143 | &optional return) | |
144 | (let ((first t) | |
145 | (list ()) | |
146 | tem) | |
147 | (while (cond ((eobp) | |
148 | (rfc822-bad-address | |
149 | (format "End of addresses in middle of %s" name))) | |
150 | ((rfc822-looking-at terminator) | |
151 | nil) | |
152 | ((rfc822-looking-at separator) | |
153 | ;; multiple separators are allowed and do nothing. | |
154 | (while (rfc822-looking-at separator)) | |
155 | t) | |
156 | (first | |
157 | t) | |
158 | (t | |
159 | (rfc822-bad-address | |
160 | (format "Gubbish in middle of %s" name)))) | |
161 | (setq tem (funcall snarfer) | |
162 | first nil) | |
163 | (and return tem | |
164 | (setq list (if (listp tem) | |
165 | (nconc (reverse tem) list) | |
166 | (cons tem list))))) | |
167 | (nreverse list))) | |
168 | ||
169 | ;; return either an address (a string) or a list of addresses | |
170 | (defun rfc822-addresses-1 (&optional allow-groups) | |
171 | ;; Looking for an rfc822 `address' | |
172 | ;; Either a group (1*word ":" [#mailbox] ";") | |
173 | ;; or a mailbox (addr-spec | 1*word route-addr) | |
174 | ;; addr-spec is (local-part "@" domain) | |
175 | ;; route-addr is ("<" [1#("@" domain) ":"] addr-spec ">") | |
176 | ;; local-part is (word *("." word)) | |
177 | ;; word is (atom | quoted-string) | |
178 | ;; quoted-string is ("\([^\"\\n]\|\\.\|\\\n\)") | |
179 | ;; atom is [^\000-\037\177 ()<>@,;:\".[]]+ | |
180 | ;; domain is sub-domain *("." sub-domain) | |
181 | ;; sub-domain is domain-ref | domain-literal | |
182 | ;; domain-literal is "[" *(dtext | quoted-pair) "]" | |
183 | ;; dtext is "[^][\\n" | |
184 | ;; domain-ref is atom | |
d6b4d749 | 185 | (let ((rfc822-address-start (point)) |
cedaf3aa RS |
186 | (n 0)) |
187 | (catch 'address | |
188 | ;; optimize common cases: | |
189 | ;; foo | |
190 | ;; foo.bar@bar.zap | |
191 | ;; followed by "\\'\\|,\\|([^()\\]*)\\'" | |
192 | ;; other common cases are: | |
193 | ;; foo bar <foo.bar@baz.zap> | |
194 | ;; "foo bar" <foo.bar@baz.zap> | |
195 | ;; those aren't hacked yet. | |
55872f3e | 196 | (if (and (rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\"]+\\(\\|@[^][\000-\037 ()<>@,;:\\\"]+\\)" t) |
cedaf3aa RS |
197 | (progn (or (eobp) |
198 | (rfc822-looking-at ?,)))) | |
199 | (progn | |
200 | ;; rfc822-looking-at may have inserted a space | |
201 | (or (bobp) (/= (preceding-char) ?\ ) (delete-char -1)) | |
202 | ;; relying on the fact that rfc822-looking-at <char> | |
203 | ;; doesn't mung match-data | |
d6b4d749 RS |
204 | (throw 'address (buffer-substring rfc822-address-start (match-end 0))))) |
205 | (goto-char rfc822-address-start) | |
cedaf3aa RS |
206 | (while t |
207 | (cond ((and (= n 1) (rfc822-looking-at ?@)) | |
208 | ;; local-part@domain | |
209 | (rfc822-snarf-domain) | |
210 | (throw 'address | |
d6b4d749 | 211 | (buffer-substring rfc822-address-start (point)))) |
cedaf3aa RS |
212 | ((rfc822-looking-at ?:) |
213 | (cond ((not allow-groups) | |
214 | (rfc822-bad-address "A group name may not appear here")) | |
215 | ((= n 0) | |
216 | (rfc822-bad-address "No name for :...; group"))) | |
217 | ;; group | |
218 | (throw 'address | |
219 | ;; return a list of addresses | |
220 | (rfc822-snarf-frob-list ":...; group" ?\, ?\; | |
221 | 'rfc822-addresses-1 t))) | |
222 | ((rfc822-looking-at ?<) | |
223 | (let ((start (point)) | |
224 | (strip t)) | |
225 | (cond ((rfc822-looking-at ?>) | |
226 | ;; empty path | |
227 | ()) | |
228 | ((and (not (eobp)) (= (following-char) ?\@)) | |
229 | ;; <@foo.bar,@baz:quux@abcd.efg> | |
230 | (rfc822-snarf-frob-list "<...> address" ?\, ?\: | |
231 | (function (lambda () | |
232 | (if (rfc822-looking-at ?\@) | |
233 | (rfc822-snarf-domain) | |
234 | (rfc822-bad-address | |
235 | "Gubbish in route-addr"))))) | |
236 | (rfc822-snarf-words) | |
237 | (or (rfc822-looking-at ?@) | |
238 | (rfc822-bad-address "Malformed <..@..> address")) | |
239 | (rfc822-snarf-domain) | |
240 | (setq strip nil)) | |
241 | ((progn (rfc822-snarf-words) (rfc822-looking-at ?@)) | |
242 | ; allow <foo> (losing unix seems to do this) | |
243 | (rfc822-snarf-domain))) | |
244 | (let ((end (point))) | |
245 | (if (rfc822-looking-at ?\>) | |
246 | (throw 'address | |
247 | (buffer-substring (if strip start (1- start)) | |
248 | (if strip end (1+ end)))) | |
249 | (rfc822-bad-address "Unterminated <...> address"))))) | |
55872f3e | 250 | ((looking-at "[^][\000-\037 ()<>@,;:\\.]") |
cedaf3aa RS |
251 | ;; this allows "." to be part of the words preceding |
252 | ;; an addr-spec, since many broken mailers output | |
253 | ;; "Hern K. Herklemeyer III | |
254 | ;; <yank@megadeath.dod.gods-own-country>" | |
278fc732 | 255 | (let ((again t)) |
256 | (while again | |
257 | (or (= n 0) (bobp) (= (preceding-char) ?\ ) | |
258 | (insert ?\ )) | |
48aefb44 | 259 | (rfc822-snarf-words) |
278fc732 | 260 | (setq n (1+ n)) |
261 | (setq again (or (rfc822-looking-at ?.) | |
55872f3e | 262 | (looking-at "[^][\000-\037 ()<>@,;:\\.]")))))) |
cedaf3aa RS |
263 | ((= n 0) |
264 | (throw 'address nil)) | |
265 | ((= n 1) ; allow "foo" (losing unix seems to do this) | |
266 | (throw 'address | |
d6b4d749 | 267 | (buffer-substring rfc822-address-start (point)))) |
278fc732 | 268 | ((> n 1) |
269 | (rfc822-bad-address "Missing comma between addresses or badly-formatted address")) | |
270 | ((or (eobp) (= (following-char) ?,)) | |
cedaf3aa RS |
271 | (rfc822-bad-address "Missing comma or route-spec")) |
272 | (t | |
273 | (rfc822-bad-address "Strange character or missing comma"))))))) | |
274 | ||
a1506d29 | 275 | |
cedaf3aa | 276 | (defun rfc822-addresses (header-text) |
55872f3e | 277 | (if (string-match "\\`[ \t]*\\([^][\000-\037 ()<>@,;:\\\".]+\\)[ \t]*\\'" |
cedaf3aa RS |
278 | header-text) |
279 | ;; Make very simple case moderately fast. | |
280 | (list (substring header-text (match-beginning 1) (match-end 1))) | |
281 | (let ((buf (generate-new-buffer " rfc822"))) | |
282 | (unwind-protect | |
283 | (save-excursion | |
284 | (set-buffer buf) | |
285 | (make-local-variable 'case-fold-search) | |
286 | (setq case-fold-search nil) ;For speed(?) | |
287 | (insert header-text) | |
288 | ;; unfold continuation lines | |
289 | (goto-char (point-min)) | |
290 | ||
291 | (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t) | |
292 | (replace-match "\\1 " t)) | |
293 | ||
294 | (goto-char (point-min)) | |
cedaf3aa RS |
295 | (let ((list ()) |
296 | tem | |
d6b4d749 RS |
297 | rfc822-address-start); this is for rfc822-bad-address |
298 | (rfc822-nuke-whitespace) | |
cedaf3aa | 299 | (while (not (eobp)) |
d6b4d749 | 300 | (setq rfc822-address-start (point)) |
cedaf3aa RS |
301 | (setq tem |
302 | (catch 'address ; this is for rfc822-bad-address | |
303 | (cond ((rfc822-looking-at ?\,) | |
304 | nil) | |
55872f3e | 305 | ((looking-at "[][\000-\037@;:\\.>)]") |
cedaf3aa RS |
306 | (forward-char) |
307 | (rfc822-bad-address | |
308 | (format "Strange character \\%c found" | |
309 | (preceding-char)))) | |
310 | (t | |
311 | (rfc822-addresses-1 t))))) | |
312 | (cond ((null tem)) | |
313 | ((stringp tem) | |
314 | (setq list (cons tem list))) | |
315 | (t | |
316 | (setq list (nconc (nreverse tem) list))))) | |
317 | (nreverse list))) | |
318 | (and buf (kill-buffer buf)))))) | |
319 | ||
49116ac0 JB |
320 | (provide 'rfc822) |
321 | ||
cbee283d | 322 | ;; arch-tag: 5d388a24-e173-40fb-9b8e-85269de44b37 |
c88ab9ce | 323 | ;;; rfc822.el ends here |