| 1 | ;;; rfc822.el --- hairy rfc822 parser for mail and news and suchlike |
| 2 | |
| 3 | ;; Copyright (C) 1986-1987, 1990, 2001-2011 Free Software Foundation, Inc. |
| 4 | |
| 5 | ;; Author: Richard Mlynarik <mly@eddie.mit.edu> |
| 6 | ;; Maintainer: FSF |
| 7 | ;; Keywords: mail |
| 8 | |
| 9 | ;; This file is part of GNU Emacs. |
| 10 | |
| 11 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 12 | ;; it under the terms of the GNU General Public License as published by |
| 13 | ;; the Free Software Foundation, either version 3 of the License, or |
| 14 | ;; (at your option) any later version. |
| 15 | |
| 16 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 17 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 18 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 19 | ;; GNU General Public License for more details. |
| 20 | |
| 21 | ;; You should have received a copy of the GNU General Public License |
| 22 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 23 | |
| 24 | ;;; Commentary: |
| 25 | |
| 26 | ;; Support functions for parsing RFC-822 headers, used by mail and news |
| 27 | ;; modes. |
| 28 | |
| 29 | ;;; Code: |
| 30 | |
| 31 | (defvar rfc822-address-start) |
| 32 | |
| 33 | ;; uses rfc822-address-start free, throws to address |
| 34 | (defun rfc822-bad-address (reason) |
| 35 | (save-restriction |
| 36 | (insert "_^_") |
| 37 | (narrow-to-region rfc822-address-start |
| 38 | (if (re-search-forward "[,;]" nil t) |
| 39 | (max (point-min) (1- (point))) |
| 40 | (point-max))) |
| 41 | ;; make the error string be suitable for inclusion in (...) |
| 42 | (let ((losers '("\\" "(" ")" "\n"))) |
| 43 | (while losers |
| 44 | (goto-char (point-min)) |
| 45 | (while (search-forward (car losers) nil t) |
| 46 | (backward-char 1) |
| 47 | (insert ?\\) |
| 48 | (forward-char 1)) |
| 49 | (setq losers (cdr losers)))) |
| 50 | (goto-char (point-min)) (insert "(Unparsable address -- " |
| 51 | reason |
| 52 | ": \"") |
| 53 | (goto-char (point-max)) (insert "\")")) |
| 54 | (rfc822-nuke-whitespace) |
| 55 | (throw 'address (buffer-substring rfc822-address-start (point)))) |
| 56 | |
| 57 | (defun rfc822-nuke-whitespace (&optional leave-space) |
| 58 | (let (ch) |
| 59 | (while (cond ((eobp) |
| 60 | nil) |
| 61 | ((= (setq ch (following-char)) ?\() |
| 62 | (forward-char 1) |
| 63 | (while (if (eobp) |
| 64 | (rfc822-bad-address "Unbalanced comment (...)") |
| 65 | (/= (setq ch (following-char)) ?\))) |
| 66 | (cond ((looking-at "[^()\\]+") |
| 67 | (replace-match "")) |
| 68 | ((= ch ?\() |
| 69 | (rfc822-nuke-whitespace)) |
| 70 | ((< (point) (1- (point-max))) |
| 71 | (delete-char 2)) |
| 72 | (t |
| 73 | (rfc822-bad-address "orphaned backslash")))) |
| 74 | ;; delete remaining "()" |
| 75 | (forward-char -1) |
| 76 | (delete-char 2) |
| 77 | t) |
| 78 | ((memq ch '(?\ ?\t ?\n)) |
| 79 | (delete-region (point) |
| 80 | (progn (skip-chars-forward " \t\n") (point))) |
| 81 | t) |
| 82 | (t |
| 83 | nil))) |
| 84 | (or (not leave-space) |
| 85 | (eobp) |
| 86 | (bobp) |
| 87 | (= (preceding-char) ?\ ) |
| 88 | (insert ?\ )))) |
| 89 | |
| 90 | (defun rfc822-looking-at (regex &optional leave-space) |
| 91 | (if (cond ((stringp regex) |
| 92 | (if (looking-at regex) |
| 93 | (progn (goto-char (match-end 0)) |
| 94 | t))) |
| 95 | (t |
| 96 | (if (and (not (eobp)) |
| 97 | (= (following-char) regex)) |
| 98 | (progn (forward-char 1) |
| 99 | t)))) |
| 100 | (let ((tem (match-data))) |
| 101 | (rfc822-nuke-whitespace leave-space) |
| 102 | (set-match-data tem) |
| 103 | t))) |
| 104 | |
| 105 | (defun rfc822-snarf-word () |
| 106 | ;; word is atom | quoted-string |
| 107 | (cond ((= (following-char) ?\") |
| 108 | ;; quoted-string |
| 109 | (or (rfc822-looking-at "\"\\([^\"\\\n]\\|\\\\.\\|\\\\\n\\)*\"") |
| 110 | (rfc822-bad-address "Unterminated quoted string"))) |
| 111 | ((rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\".]+") |
| 112 | ;; atom |
| 113 | ) |
| 114 | (t |
| 115 | (rfc822-bad-address "Rubbish in address")))) |
| 116 | |
| 117 | (defun rfc822-snarf-words () |
| 118 | (rfc822-snarf-word) |
| 119 | (while (rfc822-looking-at ?.) |
| 120 | (rfc822-snarf-word))) |
| 121 | |
| 122 | (defun rfc822-snarf-subdomain () |
| 123 | ;; sub-domain is domain-ref | domain-literal |
| 124 | (cond ((= (following-char) ?\[) |
| 125 | ;; domain-ref |
| 126 | (or (rfc822-looking-at "\\[\\([^][\\\n]\\|\\\\.\\|\\\\\n\\)*\\]") |
| 127 | (rfc822-bad-address "Unterminated domain literal [...]"))) |
| 128 | ((rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\".]+") |
| 129 | ;; domain-literal = atom |
| 130 | ) |
| 131 | (t |
| 132 | (rfc822-bad-address "Rubbish in host/domain specification")))) |
| 133 | |
| 134 | (defun rfc822-snarf-domain () |
| 135 | (rfc822-snarf-subdomain) |
| 136 | (while (rfc822-looking-at ?.) |
| 137 | (rfc822-snarf-subdomain))) |
| 138 | |
| 139 | (defun rfc822-snarf-frob-list (name separator terminator snarfer |
| 140 | &optional return) |
| 141 | (let ((first t) |
| 142 | (list ()) |
| 143 | tem) |
| 144 | (while (cond ((eobp) |
| 145 | (rfc822-bad-address |
| 146 | (format "End of addresses in middle of %s" name))) |
| 147 | ((rfc822-looking-at terminator) |
| 148 | nil) |
| 149 | ((rfc822-looking-at separator) |
| 150 | ;; multiple separators are allowed and do nothing. |
| 151 | (while (rfc822-looking-at separator)) |
| 152 | t) |
| 153 | (first |
| 154 | t) |
| 155 | (t |
| 156 | (rfc822-bad-address |
| 157 | (format "Gubbish in middle of %s" name)))) |
| 158 | (setq tem (funcall snarfer) |
| 159 | first nil) |
| 160 | (and return tem |
| 161 | (setq list (if (listp tem) |
| 162 | (nconc (reverse tem) list) |
| 163 | (cons tem list))))) |
| 164 | (nreverse list))) |
| 165 | |
| 166 | ;; return either an address (a string) or a list of addresses |
| 167 | (defun rfc822-addresses-1 (&optional allow-groups) |
| 168 | ;; Looking for an rfc822 `address' |
| 169 | ;; Either a group (1*word ":" [#mailbox] ";") |
| 170 | ;; or a mailbox (addr-spec | 1*word route-addr) |
| 171 | ;; addr-spec is (local-part "@" domain) |
| 172 | ;; route-addr is ("<" [1#("@" domain) ":"] addr-spec ">") |
| 173 | ;; local-part is (word *("." word)) |
| 174 | ;; word is (atom | quoted-string) |
| 175 | ;; quoted-string is ("\([^\"\\n]\|\\.\|\\\n\)") |
| 176 | ;; atom is [^\000-\037\177 ()<>@,;:\".[]]+ |
| 177 | ;; domain is sub-domain *("." sub-domain) |
| 178 | ;; sub-domain is domain-ref | domain-literal |
| 179 | ;; domain-literal is "[" *(dtext | quoted-pair) "]" |
| 180 | ;; dtext is "[^][\\n" |
| 181 | ;; domain-ref is atom |
| 182 | (let ((rfc822-address-start (point)) |
| 183 | (n 0)) |
| 184 | (catch 'address |
| 185 | ;; optimize common cases: |
| 186 | ;; foo |
| 187 | ;; foo.bar@bar.zap |
| 188 | ;; followed by "\\'\\|,\\|([^()\\]*)\\'" |
| 189 | ;; other common cases are: |
| 190 | ;; foo bar <foo.bar@baz.zap> |
| 191 | ;; "foo bar" <foo.bar@baz.zap> |
| 192 | ;; those aren't hacked yet. |
| 193 | (if (and (rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\"]+\\(\\|@[^][\000-\037 ()<>@,;:\\\"]+\\)" t) |
| 194 | (progn (or (eobp) |
| 195 | (rfc822-looking-at ?,)))) |
| 196 | (progn |
| 197 | ;; rfc822-looking-at may have inserted a space |
| 198 | (or (bobp) (/= (preceding-char) ?\ ) (delete-char -1)) |
| 199 | ;; relying on the fact that rfc822-looking-at <char> |
| 200 | ;; doesn't mung match-data |
| 201 | (throw 'address (buffer-substring rfc822-address-start (match-end 0))))) |
| 202 | (goto-char rfc822-address-start) |
| 203 | (while t |
| 204 | (cond ((and (= n 1) (rfc822-looking-at ?@)) |
| 205 | ;; local-part@domain |
| 206 | (rfc822-snarf-domain) |
| 207 | (throw 'address |
| 208 | (buffer-substring rfc822-address-start (point)))) |
| 209 | ((rfc822-looking-at ?:) |
| 210 | (cond ((not allow-groups) |
| 211 | (rfc822-bad-address "A group name may not appear here")) |
| 212 | ((= n 0) |
| 213 | (rfc822-bad-address "No name for :...; group"))) |
| 214 | ;; group |
| 215 | (throw 'address |
| 216 | ;; return a list of addresses |
| 217 | (rfc822-snarf-frob-list ":...; group" ?\, ?\; |
| 218 | 'rfc822-addresses-1 t))) |
| 219 | ((rfc822-looking-at ?<) |
| 220 | (let ((start (point)) |
| 221 | (strip t)) |
| 222 | (cond ((rfc822-looking-at ?>) |
| 223 | ;; empty path |
| 224 | ()) |
| 225 | ((and (not (eobp)) (= (following-char) ?\@)) |
| 226 | ;; <@foo.bar,@baz:quux@abcd.efg> |
| 227 | (rfc822-snarf-frob-list "<...> address" ?\, ?\: |
| 228 | (function (lambda () |
| 229 | (if (rfc822-looking-at ?\@) |
| 230 | (rfc822-snarf-domain) |
| 231 | (rfc822-bad-address |
| 232 | "Gubbish in route-addr"))))) |
| 233 | (rfc822-snarf-words) |
| 234 | (or (rfc822-looking-at ?@) |
| 235 | (rfc822-bad-address "Malformed <..@..> address")) |
| 236 | (rfc822-snarf-domain) |
| 237 | (setq strip nil)) |
| 238 | ((progn (rfc822-snarf-words) (rfc822-looking-at ?@)) |
| 239 | ; allow <foo> (losing unix seems to do this) |
| 240 | (rfc822-snarf-domain))) |
| 241 | (let ((end (point))) |
| 242 | (if (rfc822-looking-at ?\>) |
| 243 | (throw 'address |
| 244 | (buffer-substring (if strip start (1- start)) |
| 245 | (if strip end (1+ end)))) |
| 246 | (rfc822-bad-address "Unterminated <...> address"))))) |
| 247 | ((looking-at "[^][\000-\037 ()<>@,;:\\.]") |
| 248 | ;; this allows "." to be part of the words preceding |
| 249 | ;; an addr-spec, since many broken mailers output |
| 250 | ;; "Hern K. Herklemeyer III |
| 251 | ;; <yank@megadeath.dod.gods-own-country>" |
| 252 | (let ((again t)) |
| 253 | (while again |
| 254 | (or (= n 0) (bobp) (= (preceding-char) ?\ ) |
| 255 | (insert ?\ )) |
| 256 | (rfc822-snarf-words) |
| 257 | (setq n (1+ n)) |
| 258 | (setq again (or (rfc822-looking-at ?.) |
| 259 | (looking-at "[^][\000-\037 ()<>@,;:\\.]")))))) |
| 260 | ((= n 0) |
| 261 | (throw 'address nil)) |
| 262 | ((= n 1) ; allow "foo" (losing unix seems to do this) |
| 263 | (throw 'address |
| 264 | (buffer-substring rfc822-address-start (point)))) |
| 265 | ((> n 1) |
| 266 | (rfc822-bad-address "Missing comma between addresses or badly-formatted address")) |
| 267 | ((or (eobp) (= (following-char) ?,)) |
| 268 | (rfc822-bad-address "Missing comma or route-spec")) |
| 269 | (t |
| 270 | (rfc822-bad-address "Strange character or missing comma"))))))) |
| 271 | |
| 272 | |
| 273 | (defun rfc822-addresses (header-text) |
| 274 | (if (string-match "\\`[ \t]*\\([^][\000-\037 ()<>@,;:\\\".]+\\)[ \t]*\\'" |
| 275 | header-text) |
| 276 | ;; Make very simple case moderately fast. |
| 277 | (list (substring header-text (match-beginning 1) (match-end 1))) |
| 278 | (let ((buf (generate-new-buffer " rfc822"))) |
| 279 | (unwind-protect |
| 280 | (with-current-buffer buf |
| 281 | (make-local-variable 'case-fold-search) |
| 282 | (setq case-fold-search nil) ;For speed(?) |
| 283 | (insert header-text) |
| 284 | ;; unfold continuation lines |
| 285 | (goto-char (point-min)) |
| 286 | |
| 287 | (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" |
| 288 | nil t) |
| 289 | (replace-match "\\1 " t)) |
| 290 | |
| 291 | (goto-char (point-min)) |
| 292 | ;; Give `rfc822-address-start' a non-nil initial value to |
| 293 | ;; prevent `rfc822-bad-address' from raising a |
| 294 | ;; `wrong-type-argument' error. |
| 295 | (let* ((rfc822-address-start (point)) |
| 296 | list tem |
| 297 | (err |
| 298 | (catch 'address |
| 299 | ;; Note that `rfc822-nuke-whitespace' and |
| 300 | ;; `rfc822-looking-at' can throw. |
| 301 | (rfc822-nuke-whitespace) |
| 302 | (while (not (eobp)) |
| 303 | (setq rfc822-address-start (point)) |
| 304 | (setq tem |
| 305 | (cond ((rfc822-looking-at ?\,) |
| 306 | nil) |
| 307 | ((looking-at "[][\000-\037@;:\\.>)]") |
| 308 | (forward-char) |
| 309 | (catch 'address ; For rfc822-bad-address |
| 310 | (rfc822-bad-address |
| 311 | (format "Strange character \\%c found" |
| 312 | (preceding-char))))) |
| 313 | (t |
| 314 | (rfc822-addresses-1 t)))) |
| 315 | (cond ((null tem)) |
| 316 | ((stringp tem) |
| 317 | (setq list (cons tem list))) |
| 318 | (t |
| 319 | (setq list (nconc (nreverse tem) list))))) |
| 320 | nil))) |
| 321 | (nreverse (append (if err (list err)) list)))) |
| 322 | (and buf (kill-buffer buf)))))) |
| 323 | |
| 324 | (provide 'rfc822) |
| 325 | |
| 326 | ;;; rfc822.el ends here |