*** empty log message ***
[bpt/emacs.git] / lisp / mail / rfc822.el
CommitLineData
c88ab9ce
ER
1;;; rfc822.el --- hairy rfc822 parser for mail and news and suchlike
2
4821e2af
ER
3;; Author: Richard Mlynarik <mly@eddie.mit.edu>
4;; Maintainer: FSF
5;; Last-Modified: 26 Nov 1990
d7b4d18f 6;; Keywords: mail
4821e2af 7
29add8b9 8;; Copyright (C) 1986, 87, 1990 Free Software Foundation, Inc.
cedaf3aa
RS
9
10;; This file is part of GNU Emacs.
11
12;; GNU Emacs is free software; you can redistribute it and/or modify
13;; it under the terms of the GNU General Public License as published by
29add8b9 14;; the Free Software Foundation; either version 2, or (at your option)
cedaf3aa
RS
15;; any later version.
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
23;; along with GNU Emacs; see the file COPYING. If not, write to
24;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
25
4821e2af
ER
26;;; Code:
27
cedaf3aa
RS
28;; uses address-start free, throws to address
29(defun rfc822-bad-address (reason)
30 (save-restriction
31 (insert "_^_")
32 (narrow-to-region address-start
33 (if (re-search-forward "[,;]" nil t)
34 (max (point-min) (1- (point)))
35 (point-max)))
36 ;; make the error string be suitable for inclusion in (...)
37 (let ((losers '("\\" "(" ")" "\n")))
38 (while losers
39 (goto-char (point-min))
40 (while (search-forward (car losers) nil t)
41 (backward-char 1)
42 (insert ?\\)
43 (forward-char 1))
44 (setq losers (cdr losers))))
45 (goto-char (point-min)) (insert "(Unparsable address -- "
46 reason
47 ":\n\t \"")
48 (goto-char (point-max)) (insert "\")"))
49 (rfc822-nuke-whitespace)
50 (throw 'address (buffer-substring address-start (point))))
51
52(defun rfc822-nuke-whitespace (&optional leave-space)
53 (let (ch)
54 (while (cond ((eobp)
55 nil)
56 ((= (setq ch (following-char)) ?\()
57 (forward-char 1)
58 (while (if (eobp)
59 (rfc822-bad-address "Unbalanced comment (...)")
60 (/= (setq ch (following-char)) ?\)))
61 (cond ((looking-at "[^()\\]+")
62 (replace-match ""))
63 ((= ch ?\()
64 (rfc822-nuke-whitespace))
65 ((< (point) (1- (point-max)))
66 (delete-char 2))
67 (t
68 (rfc822-bad-address "orphaned backslash"))))
69 ;; delete remaining "()"
70 (forward-char -1)
71 (delete-char 2)
72 t)
73 ((memq ch '(?\ ?\t ?\n))
74 (delete-region (point)
75 (progn (skip-chars-forward " \t\n") (point)))
76 t)
77 (t
78 nil)))
79 (or (not leave-space)
80 (eobp)
81 (bobp)
82 (= (preceding-char) ?\ )
83 (insert ?\ ))))
84
85(defun rfc822-looking-at (regex &optional leave-space)
86 (if (cond ((stringp regex)
87 (if (looking-at regex)
88 (progn (goto-char (match-end 0))
89 t)))
90 (t
91 (if (and (not (eobp))
92 (= (following-char) regex))
93 (progn (forward-char 1)
94 t))))
95 (let ((tem (match-data)))
96 (rfc822-nuke-whitespace leave-space)
97 (store-match-data tem)
98 t)))
99
100(defun rfc822-snarf-word ()
101 ;; word is atom | quoted-string
102 (cond ((= (following-char) ?\")
103 ;; quoted-string
104 (or (rfc822-looking-at "\"\\([^\"\\\n]\\|\\\\.\\|\\\\\n\\)*\"")
105 (rfc822-bad-address "Unterminated quoted string")))
106 ((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+")
107 ;; atom
108 )
109 (t
110 (rfc822-bad-address "Rubbish in address"))))
111
112(defun rfc822-snarf-words ()
113 (rfc822-snarf-word)
114 (while (rfc822-looking-at ?.)
115 (rfc822-snarf-word)))
116
117(defun rfc822-snarf-subdomain ()
118 ;; sub-domain is domain-ref | domain-literal
119 (cond ((= (following-char) ?\[)
120 ;; domain-ref
121 (or (rfc822-looking-at "\\[\\([^][\\\n]\\|\\\\.\\|\\\\\n\\)*\\]")
122 (rfc822-bad-address "Unterminated domain literal [...]")))
123 ((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+")
124 ;; domain-literal = atom
125 )
126 (t
127 (rfc822-bad-address "Rubbish in host/domain specification"))))
128
129(defun rfc822-snarf-domain ()
130 (rfc822-snarf-subdomain)
131 (while (rfc822-looking-at ?.)
132 (rfc822-snarf-subdomain)))
133
134(defun rfc822-snarf-frob-list (name separator terminator snarfer
135 &optional return)
136 (let ((first t)
137 (list ())
138 tem)
139 (while (cond ((eobp)
140 (rfc822-bad-address
141 (format "End of addresses in middle of %s" name)))
142 ((rfc822-looking-at terminator)
143 nil)
144 ((rfc822-looking-at separator)
145 ;; multiple separators are allowed and do nothing.
146 (while (rfc822-looking-at separator))
147 t)
148 (first
149 t)
150 (t
151 (rfc822-bad-address
152 (format "Gubbish in middle of %s" name))))
153 (setq tem (funcall snarfer)
154 first nil)
155 (and return tem
156 (setq list (if (listp tem)
157 (nconc (reverse tem) list)
158 (cons tem list)))))
159 (nreverse list)))
160
161;; return either an address (a string) or a list of addresses
162(defun rfc822-addresses-1 (&optional allow-groups)
163 ;; Looking for an rfc822 `address'
164 ;; Either a group (1*word ":" [#mailbox] ";")
165 ;; or a mailbox (addr-spec | 1*word route-addr)
166 ;; addr-spec is (local-part "@" domain)
167 ;; route-addr is ("<" [1#("@" domain) ":"] addr-spec ">")
168 ;; local-part is (word *("." word))
169 ;; word is (atom | quoted-string)
170 ;; quoted-string is ("\([^\"\\n]\|\\.\|\\\n\)")
171 ;; atom is [^\000-\037\177 ()<>@,;:\".[]]+
172 ;; domain is sub-domain *("." sub-domain)
173 ;; sub-domain is domain-ref | domain-literal
174 ;; domain-literal is "[" *(dtext | quoted-pair) "]"
175 ;; dtext is "[^][\\n"
176 ;; domain-ref is atom
177 (let ((address-start (point))
178 (n 0))
179 (catch 'address
180 ;; optimize common cases:
181 ;; foo
182 ;; foo.bar@bar.zap
183 ;; followed by "\\'\\|,\\|([^()\\]*)\\'"
184 ;; other common cases are:
185 ;; foo bar <foo.bar@baz.zap>
186 ;; "foo bar" <foo.bar@baz.zap>
187 ;; those aren't hacked yet.
188 (if (and (rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\(\\|@[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\)" t)
189 (progn (or (eobp)
190 (rfc822-looking-at ?,))))
191 (progn
192 ;; rfc822-looking-at may have inserted a space
193 (or (bobp) (/= (preceding-char) ?\ ) (delete-char -1))
194 ;; relying on the fact that rfc822-looking-at <char>
195 ;; doesn't mung match-data
196 (throw 'address (buffer-substring address-start (match-end 0)))))
197 (goto-char address-start)
198 (while t
199 (cond ((and (= n 1) (rfc822-looking-at ?@))
200 ;; local-part@domain
201 (rfc822-snarf-domain)
202 (throw 'address
203 (buffer-substring address-start (point))))
204 ((rfc822-looking-at ?:)
205 (cond ((not allow-groups)
206 (rfc822-bad-address "A group name may not appear here"))
207 ((= n 0)
208 (rfc822-bad-address "No name for :...; group")))
209 ;; group
210 (throw 'address
211 ;; return a list of addresses
212 (rfc822-snarf-frob-list ":...; group" ?\, ?\;
213 'rfc822-addresses-1 t)))
214 ((rfc822-looking-at ?<)
215 (let ((start (point))
216 (strip t))
217 (cond ((rfc822-looking-at ?>)
218 ;; empty path
219 ())
220 ((and (not (eobp)) (= (following-char) ?\@))
221 ;; <@foo.bar,@baz:quux@abcd.efg>
222 (rfc822-snarf-frob-list "<...> address" ?\, ?\:
223 (function (lambda ()
224 (if (rfc822-looking-at ?\@)
225 (rfc822-snarf-domain)
226 (rfc822-bad-address
227 "Gubbish in route-addr")))))
228 (rfc822-snarf-words)
229 (or (rfc822-looking-at ?@)
230 (rfc822-bad-address "Malformed <..@..> address"))
231 (rfc822-snarf-domain)
232 (setq strip nil))
233 ((progn (rfc822-snarf-words) (rfc822-looking-at ?@))
234 ; allow <foo> (losing unix seems to do this)
235 (rfc822-snarf-domain)))
236 (let ((end (point)))
237 (if (rfc822-looking-at ?\>)
238 (throw 'address
239 (buffer-substring (if strip start (1- start))
240 (if strip end (1+ end))))
241 (rfc822-bad-address "Unterminated <...> address")))))
242 ((looking-at "[^][\000-\037\177-\377 ()<>@,;:\\.]")
243 ;; this allows "." to be part of the words preceding
244 ;; an addr-spec, since many broken mailers output
245 ;; "Hern K. Herklemeyer III
246 ;; <yank@megadeath.dod.gods-own-country>"
278fc732 247 (let ((again t))
248 (while again
249 (or (= n 0) (bobp) (= (preceding-char) ?\ )
250 (insert ?\ ))
48aefb44 251 (rfc822-snarf-words)
278fc732 252 (setq n (1+ n))
253 (setq again (or (rfc822-looking-at ?.)
254 (looking-at "[^][\000-\037\177-\377 ()<>@,;:\\.]"))))))
cedaf3aa
RS
255 ((= n 0)
256 (throw 'address nil))
257 ((= n 1) ; allow "foo" (losing unix seems to do this)
258 (throw 'address
259 (buffer-substring address-start (point))))
278fc732 260 ((> n 1)
261 (rfc822-bad-address "Missing comma between addresses or badly-formatted address"))
262 ((or (eobp) (= (following-char) ?,))
cedaf3aa
RS
263 (rfc822-bad-address "Missing comma or route-spec"))
264 (t
265 (rfc822-bad-address "Strange character or missing comma")))))))
266
267
268(defun rfc822-addresses (header-text)
269 (if (string-match "\\`[ \t]*\\([^][\000-\037\177-\377 ()<>@,;:\\\".]+\\)[ \t]*\\'"
270 header-text)
271 ;; Make very simple case moderately fast.
272 (list (substring header-text (match-beginning 1) (match-end 1)))
273 (let ((buf (generate-new-buffer " rfc822")))
274 (unwind-protect
275 (save-excursion
276 (set-buffer buf)
277 (make-local-variable 'case-fold-search)
278 (setq case-fold-search nil) ;For speed(?)
279 (insert header-text)
280 ;; unfold continuation lines
281 (goto-char (point-min))
282
283 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
284 (replace-match "\\1 " t))
285
286 (goto-char (point-min))
287 (rfc822-nuke-whitespace)
288 (let ((list ())
289 tem
290 address-start); this is for rfc822-bad-address
291 (while (not (eobp))
292 (setq address-start (point))
293 (setq tem
294 (catch 'address ; this is for rfc822-bad-address
295 (cond ((rfc822-looking-at ?\,)
296 nil)
c82d7590 297 ((looking-at "[][\000-\037\177-\377@;:\\.>)]")
cedaf3aa
RS
298 (forward-char)
299 (rfc822-bad-address
300 (format "Strange character \\%c found"
301 (preceding-char))))
302 (t
303 (rfc822-addresses-1 t)))))
304 (cond ((null tem))
305 ((stringp tem)
306 (setq list (cons tem list)))
307 (t
308 (setq list (nconc (nreverse tem) list)))))
309 (nreverse list)))
310 (and buf (kill-buffer buf))))))
311
49116ac0
JB
312(provide 'rfc822)
313
c88ab9ce 314;;; rfc822.el ends here