* src/eval.c (Fbind_symbol): New function.
[bpt/emacs.git] / lisp / mail / mail-extr.el
CommitLineData
b1ccc1ce 1;;; mail-extr.el --- extract full name and address from RFC 822 mail header -*- coding: utf-8 -*-
72c0ae01 2
ba318903 3;; Copyright (C) 1991-1994, 1997, 2001-2014 Free Software Foundation,
ab422c4d 4;; Inc.
3a801d0c 5
72c0ae01 6;; Author: Joe Wells <jbw@cs.bu.edu>
34dc21db 7;; Maintainer: emacs-devel@gnu.org
72c0ae01 8;; Keywords: mail
bd78fa1d 9;; Package: mail-utils
72c0ae01 10
72c0ae01
ER
11;; This file is part of GNU Emacs.
12
b1fc2b50 13;; GNU Emacs is free software: you can redistribute it and/or modify
72c0ae01 14;; it under the terms of the GNU General Public License as published by
b1fc2b50
GM
15;; the Free Software Foundation, either version 3 of the License, or
16;; (at your option) any later version.
72c0ae01
ER
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
b1fc2b50 24;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
72c0ae01
ER
25
26;;; Commentary:
27
154b3e39
RS
28;; The entry point of this code is
29;;
d1782bd8 30;; mail-extract-address-components: (address &optional all)
6c83d99f 31;;
154b3e39
RS
32;; Given an RFC-822 ADDRESS, extract full name and canonical address.
33;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
34;; If no name can be extracted, FULL-NAME will be nil.
6c83d99f 35;; ADDRESS may be a string or a buffer. If it is a buffer, the visible
154b3e39
RS
36;; (narrowed) portion of the buffer will be interpreted as the address.
37;; (This feature exists so that the clever caller might be able to avoid
38;; consing a string.)
39;; If ADDRESS contains more than one RFC-822 address, only the first is
40;; returned.
41;;
d1782bd8
KH
42;; If ALL is non-nil, that means return info about all the addresses
43;; that are found in ADDRESS. The value is a list of elements of
44;; the form (FULL-NAME CANONICAL-ADDRESS), one per address.
45;;
154b3e39
RS
46;; This code is more correct (and more heuristic) parser than the code in
47;; rfc822.el. And despite its size, it's fairly fast.
48;;
72c0ae01 49;; There are two main benefits:
154b3e39 50;;
72c0ae01 51;; 1. Higher probability of getting the correct full name for a human than
154b3e39 52;; any other package we know of. (On the other hand, it will cheerfully
72c0ae01
ER
53;; mangle non-human names/comments.)
54;; 2. Address part is put in a canonical form.
154b3e39
RS
55;;
56;; The interface is not yet carved in stone; please give us suggestions.
57;;
58;; We have an extensive test-case collection of funny addresses if you want to
72c0ae01 59;; work with the code. Developing this code requires frequent testing to
154b3e39
RS
60;; make sure you're not breaking functionality. The test cases aren't included
61;; because they are over 100K.
62;;
6c83d99f 63;; If you find an address that mail-extr fails on, please send it to the
154b3e39
RS
64;; maintainer along with what you think the correct results should be. We do
65;; not consider it a bug if mail-extr mangles a comment that does not
6c83d99f 66;; correspond to a real human full name, although we would prefer that
154b3e39
RS
67;; mail-extr would return the comment as-is.
68;;
72c0ae01 69;; Features:
154b3e39 70;;
72c0ae01 71;; * Full name handling:
154b3e39 72;;
72c0ae01
ER
73;; * knows where full names can be found in an address.
74;; * avoids using empty comments and quoted text.
75;; * extracts full names from mailbox names.
76;; * recognizes common formats for comments after a full name.
77;; * puts a period and a space after each initial.
154b3e39
RS
78;; * understands & referring to the mailbox name, capitalized.
79;; * strips name prefixes like "Prof.", etc.
72c0ae01
ER
80;; * understands what characters can occur in names (not just letters).
81;; * figures out middle initial from mailbox name.
82;; * removes funny nicknames.
83;; * keeps suffixes such as Jr., Sr., III, etc.
84;; * reorders "Last, First" type names.
154b3e39 85;;
72c0ae01 86;; * Address handling:
154b3e39 87;;
72c0ae01
ER
88;; * parses rfc822 quoted text, comments, and domain literals.
89;; * parses rfc822 multi-line headers.
90;; * does something reasonable with rfc822 GROUP addresses.
91;; * handles many rfc822 noncompliant and garbage addresses.
92;; * canonicalizes addresses (after stripping comments/phrases outside <>).
93;; * converts ! addresses into .UUCP and %-style addresses.
94;; * converts rfc822 ROUTE addresses to %-style addresses.
95;; * truncates %-style addresses at leftmost fully qualified domain name.
96;; * handles local relative precedence of ! vs. % and @ (untested).
154b3e39 97;;
72c0ae01
ER
98;; It does almost no string creation. It primarily uses the built-in
99;; parsing routines with the appropriate syntax tables. This should
100;; result in greater speed.
154b3e39 101;;
72c0ae01 102;; TODO:
154b3e39 103;;
72c0ae01
ER
104;; * handle all test cases. (This will take forever.)
105;; * software to pick the correct header to use (eg., "Senders-Name:").
106;; * multiple addresses in the "From:" header (almost all of the necessary
107;; code is there).
108;; * flag to not treat `,' as an address separator. (This is useful when
109;; there is a "From:" header but no "Sender:" header, because then there
110;; is only allowed to be one address.)
111;; * mailbox name does not necessarily contain full name.
112;; * fixing capitalization when it's all upper or lowercase. (Hard!)
113;; * some of the domain literal handling is missing. (But I've never even
114;; seen one of these in a mail address, so maybe no big deal.)
115;; * arrange to have syntax tables byte-compiled.
116;; * speed hacks.
117;; * delete unused variables.
118;; * arrange for testing with different relative precedences of ! vs. @
119;; and %.
72c0ae01
ER
120;; * insert documentation strings!
121;; * handle X.400-gatewayed addresses according to RFC 1148.
122
6c83d99f
JB
123;;; Change Log:
124;;
154b3e39
RS
125;; Thu Feb 17 17:57:33 1994 Jamie Zawinski (jwz@lucid.com)
126;;
127;; * merged with jbw's latest version
128;;
129;; Wed Feb 9 21:56:27 1994 Jamie Zawinski (jwz@lucid.com)
130;;
131;; * high-bit chars in comments weren't treated as word syntax
132;;
133;; Sat Feb 5 03:13:40 1994 Jamie Zawinski (jwz@lucid.com)
134;;
135;; * call replace-match with fixed-case arg
136;;
137;; Thu Dec 16 21:56:45 1993 Jamie Zawinski (jwz@lucid.com)
138;;
139;; * some more cleanup, doc, added provide
140;;
141;; Tue Mar 23 21:23:18 1993 Joe Wells (jbw at csd.bu.edu)
6c83d99f 142;;
154b3e39 143;; * Made mail-full-name-prefixes a user-customizable variable.
79814626 144;; Allow passing the address as a buffer as well as a string.
154b3e39 145;; Allow [ and ] as name characters (Finnish character set).
6c83d99f 146;;
154b3e39 147;; Mon Mar 22 21:20:56 1993 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 148;;
154b3e39
RS
149;; * Handle "null" addresses. Handle = used for spacing in mailbox
150;; name. Fix bug in handling of ROUTE-ADDR-type addresses that are
151;; missing their brackets. Handle uppercase "JR". Extract full
152;; names from X.400 addresses encoded in RFC-822. Fix bug in
153;; handling of multiple addresses where first has trailing comment.
154;; Handle more kinds of telephone extension lead-ins.
6c83d99f 155;;
154b3e39 156;; Mon Mar 22 20:16:57 1993 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 157;;
154b3e39 158;; * Handle HZ encoding for embedding GB encoded chinese characters.
6c83d99f 159;;
154b3e39 160;; Mon Mar 22 00:46:12 1993 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 161;;
154b3e39
RS
162;; * Fixed too broad matching of ham radio call signs. Fixed bug in
163;; handling an unmatched ' in a name string. Enhanced recognition
164;; of when . in the mailbox name terminates the name portion.
165;; Narrowed conversion of . to space to only the necessary
166;; situation. Deal with VMS's stupid date stamps. Handle a unique
167;; way of introducing an alternate address. Fixed spacing bug I
168;; introduced in switching last name order. Fixed bug in handling
169;; address with ! and % but no @. Narrowed the cases in which
170;; certain trailing words are discarded.
6c83d99f 171;;
154b3e39 172;; Sun Mar 21 21:41:06 1993 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 173;;
154b3e39
RS
174;; * Fixed bugs in handling GROUP addresses. Certain words in the
175;; middle of a name no longer terminate it. Handle LISTSERV list
176;; names. Ignore comment field containing mailbox name.
6c83d99f 177;;
154b3e39 178;; Sun Mar 21 14:39:38 1993 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 179;;
154b3e39
RS
180;; * Moved variant-method code back into main function. Handle
181;; underscores as spaces in comments. Handle leading nickname. Add
182;; flag to ignore single-word names. Other changes.
6c83d99f 183;;
154b3e39 184;; Mon Feb 1 22:23:31 1993 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 185;;
154b3e39
RS
186;; * Added in changes by Rod Whitby and Jamie Zawinski. This
187;; includes the flag mail-extr-guess-middle-initial and the fix for
6b07c06e
RS
188;; handling multiple addresses correctly. (Whitby just changed
189;; a > to a <.)
6c83d99f 190;;
72c0ae01 191;; Mon Apr 6 23:59:09 1992 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 192;;
72c0ae01 193;; * Cleaned up some more. Release version 1.0 to world.
6c83d99f 194;;
72c0ae01 195;; Sun Apr 5 19:39:08 1992 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 196;;
72c0ae01 197;; * Cleaned up full name extraction extensively.
6c83d99f 198;;
72c0ae01 199;; Sun Feb 2 14:45:24 1992 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 200;;
72c0ae01
ER
201;; * Total rewrite. Integrated mail-canonicalize-address into
202;; mail-extract-address-components. Now handles GROUP addresses more
203;; or less correctly. Better handling of lots of different cases.
6c83d99f 204;;
72c0ae01
ER
205;; Fri Jun 14 19:39:50 1991
206;; * Created.
207
208;;; Code:
209\f
72c0ae01 210
0b5bb3ec
SE
211(defgroup mail-extr nil
212 "Extract full name and address from RFC 822 mail header."
213 :prefix "mail-extr-"
214 :group 'mail)
215
154b3e39
RS
216;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
217;;
218;; User configuration variable definitions.
219;;
220
0b5bb3ec 221(defcustom mail-extr-guess-middle-initial nil
fb7ada5f 222 "Whether to try to guess middle initial from mail address.
154b3e39 223If true, then when we see an address like \"John Smith <jqs@host.com>\"
0b5bb3ec
SE
224we will assume that \"John Q. Smith\" is the fellow's name."
225 :type 'boolean
226 :group 'mail-extr)
154b3e39 227
2a487c6c 228(defcustom mail-extr-ignore-single-names nil
fb7ada5f 229 "Whether to ignore a name that is just a single word.
154b3e39 230If true, then when we see an address like \"Idiot <dumb@stupid.com>\"
0b5bb3ec
SE
231we will act as though we couldn't find a full name in the address."
232 :type 'boolean
bf247b6e 233 :version "22.1"
0b5bb3ec 234 :group 'mail-extr)
154b3e39 235
67e61e16
EZ
236(defcustom mail-extr-ignore-realname-equals-mailbox-name t
237"*Whether to ignore a name that is equal to the mailbox name.
238If true, then when the address is like \"Single <single@address.com>\"
239we will act as though we couldn't find a full name in the address."
240 :type 'boolean
241 :group 'mail-extr)
242
154b3e39
RS
243;; Matches a leading title that is not part of the name (does not
244;; contribute to uniquely identifying the person).
0b5bb3ec 245(defcustom mail-extr-full-name-prefixes
154b3e39
RS
246 (purecopy
247 "\\(Prof\\|D[Rr]\\|Mrs?\\|Rev\\|Rabbi\\|SysOp\\|LCDR\\)\\.?[ \t\n]")
fb7ada5f 248 "Matches prefixes to the full name that identify a person's position.
154b3e39 249These are stripped from the full name because they do not contribute to
0b5bb3ec
SE
250uniquely identifying the person."
251 :type 'regexp
252 :group 'mail-extr)
154b3e39 253
0b5bb3ec 254(defcustom mail-extr-@-binds-tighter-than-! nil
fb7ada5f 255 "Whether the local mail transport agent looks at ! before @."
0b5bb3ec
SE
256 :type 'boolean
257 :group 'mail-extr)
154b3e39 258
0b5bb3ec 259(defcustom mail-extr-mangle-uucp nil
fb7ada5f 260 "Whether to throw away information in UUCP addresses
0b5bb3ec
SE
261by translating things like \"foo!bar!baz@host\" into \"baz@bar.UUCP\"."
262 :type 'boolean
263 :group 'mail-extr)
72c0ae01
ER
264
265;;----------------------------------------------------------------------
266;; what orderings are meaningful?????
267;;(defvar mail-operator-precedence-list '(?! ?% ?@))
268;; Right operand of a % or a @ must be a domain name, period. No other
269;; operators allowed. Left operand of a @ is an address relative to that
270;; site.
271
272;; Left operand of a ! must be a domain name. Right operand is an
273;; arbitrary address.
274;;----------------------------------------------------------------------
275
154b3e39 276\f
72c0ae01 277
154b3e39
RS
278;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
279;;
280;; Constant definitions.
281;;
282
72c0ae01 283;; Any character that can occur in a name, not counting characters that
154b3e39
RS
284;; separate parts of a multipart name (hyphen and period).
285;; Yes, there are weird people with digits in their names.
286;; You will also notice the consideration for the
287;; Swedish/Finnish/Norwegian character set.
154b3e39 288(defconst mail-extr-all-letters-but-separators
6e66e4c6 289 (purecopy "][[:alnum:]{|}'~`"))
154b3e39
RS
290
291;; Any character that can occur in a name in an RFC822 address including
292;; the separator (hyphen and possibly period) for multipart names.
293;; #### should . be in here?
294(defconst mail-extr-all-letters
295 (purecopy (concat mail-extr-all-letters-but-separators "---")))
296
297;; Any character that can start a name.
298;; Keep this set as minimal as possible.
6e66e4c6 299(defconst mail-extr-first-letters (purecopy "[:alpha:]"))
72c0ae01
ER
300
301;; Any character that can end a name.
154b3e39 302;; Keep this set as minimal as possible.
6e66e4c6 303(defconst mail-extr-last-letters (purecopy "[:alpha:]`'."))
72c0ae01 304
af604656 305(defconst mail-extr-leading-garbage "\\W+")
72c0ae01 306
6c83d99f 307;; (defconst mail-extr-non-name-chars
154b3e39
RS
308;; (purecopy (concat "^" mail-extr-all-letters ".")))
309;; (defconst mail-extr-non-begin-name-chars
310;; (purecopy (concat "^" mail-extr-first-letters)))
311;; (defconst mail-extr-non-end-name-chars
312;; (purecopy (concat "^" mail-extr-last-letters)))
72c0ae01 313
6c83d99f 314;; Matches an initial not followed by both a period and a space.
154b3e39 315;; (defconst mail-extr-bad-initials-pattern
6c83d99f 316;; (purecopy
154b3e39
RS
317;; (format "\\(\\([^%s]\\|\\`\\)[%s]\\)\\(\\.\\([^ ]\\)\\| \\|\\([^%s .]\\)\\|\\'\\)"
318;; mail-extr-all-letters mail-extr-first-letters mail-extr-all-letters)))
72c0ae01
ER
319
320;; Matches periods used instead of spaces. Must not match the period
321;; following an initial.
154b3e39
RS
322(defconst mail-extr-bad-dot-pattern
323 (purecopy
324 (format "\\([%s][%s]\\)\\.+\\([%s]\\)"
325 mail-extr-all-letters
326 mail-extr-last-letters
327 mail-extr-first-letters)))
72c0ae01
ER
328
329;; Matches an embedded or leading nickname that should be removed.
154b3e39
RS
330;; (defconst mail-extr-nickname-pattern
331;; (purecopy
332;; (format "\\([ .]\\|\\`\\)[\"'`\[\(]\\([ .%s]+\\)[\]\"'\)] "
333;; mail-extr-all-letters)))
72c0ae01
ER
334
335;; Matches the occurrence of a generational name suffix, and the last
154b3e39
RS
336;; character of the preceding name. This is important because we want to
337;; keep such suffixes: they help to uniquely identify the person.
338;; *** Perhaps this should be a user-customizable variable. However, the
339;; *** regular expression is fairly tricky to alter, so maybe not.
340(defconst mail-extr-full-name-suffix-pattern
341 (purecopy
342 (format
343 "\\(,? ?\\([JjSs][Rr]\\.?\\|V?I+V?\\)\\)\\([^%s]\\([^%s]\\|\\'\\)\\|\\'\\)"
344 mail-extr-all-letters mail-extr-all-letters)))
345
346(defconst mail-extr-roman-numeral-pattern (purecopy "V?I+V?\\b"))
72c0ae01
ER
347
348;; Matches a trailing uppercase (with other characters possible) acronym.
349;; Must not match a trailing uppercase last name or trailing initial
154b3e39
RS
350(defconst mail-extr-weird-acronym-pattern
351 (purecopy "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)"))
6c83d99f 352
72c0ae01 353;; Matches a mixed-case or lowercase name (not an initial).
154b3e39
RS
354;; #### Match Latin1 lower case letters here too?
355;; (defconst mail-extr-mixed-case-name-pattern
356;; (purecopy
357;; (format
358;; "\\b\\([a-z][%s]*[%s]\\|[%s][%s]*[a-z][%s]*[%s]\\|[%s][%s]*[a-z]\\)"
359;; mail-extr-all-letters mail-extr-last-letters
360;; mail-extr-first-letters mail-extr-all-letters mail-extr-all-letters
361;; mail-extr-last-letters mail-extr-first-letters mail-extr-all-letters)))
72c0ae01
ER
362
363;; Matches a trailing alternative address.
154b3e39 364;; #### Match Latin1 letters here too?
6c83d99f 365;; #### Match _ before @ here too?
154b3e39
RS
366(defconst mail-extr-alternative-address-pattern
367 (purecopy "\\(aka *\\)?[a-zA-Z.]+[!@][a-zA-Z.]"))
72c0ae01
ER
368
369;; Matches a variety of trailing comments not including comma-delimited
370;; comments.
154b3e39
RS
371(defconst mail-extr-trailing-comment-start-pattern
372 (purecopy " [-{]\\|--\\|[+@#></\;]"))
72c0ae01
ER
373
374;; Matches a name (not an initial).
375;; This doesn't force a word boundary at the end because sometimes a
376;; comment is separated by a `-' with no preceding space.
154b3e39
RS
377(defconst mail-extr-name-pattern
378 (purecopy (format "\\b[%s][%s]*[%s]"
379 mail-extr-first-letters
380 mail-extr-all-letters
381 mail-extr-last-letters)))
72c0ae01 382
154b3e39
RS
383(defconst mail-extr-initial-pattern
384 (purecopy (format "\\b[%s]\\([. ]\\|\\b\\)" mail-extr-first-letters)))
72c0ae01
ER
385
386;; Matches a single name before a comma.
154b3e39
RS
387;; (defconst mail-extr-last-name-first-pattern
388;; (purecopy (concat "\\`" mail-extr-name-pattern ",")))
72c0ae01
ER
389
390;; Matches telephone extensions.
154b3e39
RS
391(defconst mail-extr-telephone-extension-pattern
392 (purecopy
393 "\\(\\([Ee]xt\\|\\|[Tt]ph\\|[Tt]el\\|[Xx]\\).?\\)? *\\+?[0-9][- 0-9]+"))
72c0ae01
ER
394
395;; Matches ham radio call signs.
154b3e39
RS
396;; Help from: Mat Maessen N2NJZ <maessm@rpi.edu>, Mark Feit
397;; <mark@era.com>, Michael Covington <mcovingt@ai.uga.edu>.
5a5fa834 398;; Examples: DX504 DX515 K5MRU K8DHK KA9WGN KD3FU KD6EUI KD6HBW
154b3e39
RS
399;; KE9TV KF0NV N1API N3FU N3GZE N3IGS N4KCC N7IKQ N9HHU W4YHF W6ANK WA2SUH
400;; WB7VZI N2NJZ NR3G KJ4KK AB4UM AL7NI KH6OH WN3KBT N4TMI W1A N0NZO
401(defconst mail-extr-ham-call-sign-pattern
402 (purecopy "\\b\\(DX[0-9]+\\|[AKNW][A-Z]?[0-9][A-Z][A-Z]?[A-Z]?\\)"))
403
404;; Possible trailing suffixes: "\\(/\\(KT\\|A[AEG]\\|[R0-9]\\)\\)?"
405;; /KT == Temporary Technician (has CSC but not "real" license)
406;; /AA == Temporary Advanced
407;; /AE == Temporary Extra
408;; /AG == Temporary General
409;; /R == repeater
410;; /# == stations operating out of home district
411;; I don't include these in the regexp above because I can't imagine
412;; anyone putting them with their name in an e-mail address.
72c0ae01
ER
413
414;; Matches normal single-part name
154b3e39
RS
415(defconst mail-extr-normal-name-pattern
416 (purecopy (format "\\b[%s][%s]+[%s]"
417 mail-extr-first-letters
418 mail-extr-all-letters-but-separators
419 mail-extr-last-letters)))
420
421;; Matches a single word name.
422;; (defconst mail-extr-one-name-pattern
423;; (purecopy (concat "\\`" mail-extr-normal-name-pattern "\\'")))
6c83d99f 424
72c0ae01 425;; Matches normal two names with missing middle initial
154b3e39
RS
426;; The first name is not allowed to have a hyphen because this can cause
427;; false matches where the "middle initial" is actually the first letter
428;; of the second part of the first name.
429(defconst mail-extr-two-name-pattern
430 (purecopy
431 (concat "\\`\\(" mail-extr-normal-name-pattern
432 "\\|" mail-extr-initial-pattern
433 "\\) +\\(" mail-extr-name-pattern "\\)\\(,\\|\\'\\)")))
434
435(defconst mail-extr-listserv-list-name-pattern
436 (purecopy "Multiple recipients of list \\([-A-Z]+\\)"))
437
438(defconst mail-extr-stupid-vms-date-stamp-pattern
439 (purecopy
440 "[0-9][0-9]-[JFMASOND][aepuco][nbrylgptvc]-[0-9][0-9][0-9][0-9] [0-9]+ *"))
441
442;;; HZ -- GB (PRC Chinese character encoding) in ASCII embedding protocol
443;;
444;; In ASCII mode, a byte is interpreted as an ASCII character, unless a '~' is
445;; encountered. The character '~' is an escape character. By convention, it
446;; must be immediately followed ONLY by '~', '{' or '\n' (<LF>), with the
447;; following special meaning.
6c83d99f 448;;
154b3e39
RS
449;; o The escape sequence '~~' is interpreted as a '~'.
450;; o The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
451;; o The escape sequence '~\n' is a line-continuation marker to be consumed
452;; with no output produced.
6c83d99f 453;;
154b3e39
RS
454;; In GB mode, characters are interpreted two bytes at a time as (pure) GB
455;; codes until the escape-from-GB code '~}' is read. This code switches the
456;; mode from GB back to ASCII. (Note that the escape-from-GB code '~}'
457;; ($7E7D) is outside the defined GB range.)
458(defconst mail-extr-hz-embedded-gb-encoded-chinese-pattern
459 (purecopy "~{\\([^~].\\|~[^\}]\\)+~}"))
460
461;; The leading optional lowercase letters are for a bastardized version of
462;; the encoding, as is the optional nature of the final slash.
463(defconst mail-extr-x400-encoded-address-pattern
464 (purecopy "[a-z]?[a-z]?\\(/[A-Za-z]+\\(\\.[A-Za-z]+\\)?=[^/]+\\)+/?\\'"))
465
466(defconst mail-extr-x400-encoded-address-field-pattern-format
467 (purecopy "/%s=\\([^/]+\\)\\(/\\|\\'\\)"))
468
469(defconst mail-extr-x400-encoded-address-surname-pattern
470 ;; S stands for Surname (family name).
471 (purecopy
472 (format mail-extr-x400-encoded-address-field-pattern-format "[Ss]")))
473
474(defconst mail-extr-x400-encoded-address-given-name-pattern
475 ;; G stands for Given name.
476 (purecopy
477 (format mail-extr-x400-encoded-address-field-pattern-format "[Gg]")))
478
479(defconst mail-extr-x400-encoded-address-full-name-pattern
480 ;; PN stands for Personal Name. When used it represents the combination
481 ;; of the G and S fields.
482 ;; "The one system I used having this field asked it with the prompt
483 ;; `Personal Name'. But they mapped it into G and S on outgoing real
484 ;; X.400 addresses. As they mapped G and S into PN on incoming..."
485 (purecopy
486 (format mail-extr-x400-encoded-address-field-pattern-format "[Pp][Nn]")))
487
488\f
489
490;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
491;;
492;; Syntax tables used for quick parsing.
493;;
494
495(defconst mail-extr-address-syntax-table (make-syntax-table))
496(defconst mail-extr-address-comment-syntax-table (make-syntax-table))
497(defconst mail-extr-address-domain-literal-syntax-table (make-syntax-table))
498(defconst mail-extr-address-text-comment-syntax-table (make-syntax-table))
499(defconst mail-extr-address-text-syntax-table (make-syntax-table))
d980c402
SM
500(mapc
501 (lambda (pair)
502 (let ((syntax-table (symbol-value (car pair))))
503 (dolist (item (cdr pair))
504 (if (eq 2 (length item))
505 ;; modifying syntax of a single character
506 (modify-syntax-entry (car item) (car (cdr item)) syntax-table)
507 ;; modifying syntax of a range of characters
508 (let ((char (nth 0 item))
509 (bound (nth 1 item))
510 (syntax (nth 2 item)))
511 (while (<= char bound)
512 (modify-syntax-entry char syntax syntax-table)
513 (setq char (1+ char))))))))
154b3e39
RS
514 '((mail-extr-address-syntax-table
515 (?\000 ?\037 "w") ;control characters
516 (?\040 " ") ;SPC
517 (?! ?~ "w") ;printable characters
518 (?\177 "w") ;DEL
72c0ae01
ER
519 (?\t " ")
520 (?\r " ")
521 (?\n " ")
522 (?\( ".")
523 (?\) ".")
524 (?< ".")
525 (?> ".")
526 (?@ ".")
527 (?, ".")
528 (?\; ".")
529 (?: ".")
530 (?\\ "\\")
531 (?\" "\"")
532 (?. ".")
533 (?\[ ".")
534 (?\] ".")
535 ;; % and ! aren't RFC822 characters, but it is convenient to pretend
536 (?% ".")
154b3e39 537 (?! ".") ;; this needs to be word-constituent when not in .UUCP mode
72c0ae01 538 )
154b3e39
RS
539 (mail-extr-address-comment-syntax-table
540 (?\000 ?\377 "w")
541 (?\040 " ")
542 (?\240 " ")
543 (?\t " ")
544 (?\r " ")
545 (?\n " ")
72c0ae01
ER
546 (?\( "\(\)")
547 (?\) "\)\(")
548 (?\\ "\\"))
154b3e39
RS
549 (mail-extr-address-domain-literal-syntax-table
550 (?\000 ?\377 "w")
551 (?\040 " ")
552 (?\240 " ")
553 (?\t " ")
554 (?\r " ")
555 (?\n " ")
72c0ae01
ER
556 (?\[ "\(\]") ;??????
557 (?\] "\)\[") ;??????
558 (?\\ "\\"))
154b3e39
RS
559 (mail-extr-address-text-comment-syntax-table
560 (?\000 ?\377 "w")
561 (?\040 " ")
562 (?\240 " ")
563 (?\t " ")
564 (?\r " ")
565 (?\n " ")
72c0ae01
ER
566 (?\( "\(\)")
567 (?\) "\)\(")
568 (?\[ "\(\]")
569 (?\] "\)\[")
570 (?\{ "\(\}")
571 (?\} "\)\{")
572 (?\\ "\\")
573 (?\" "\"")
574 ;; (?\' "\)\`")
575 ;; (?\` "\(\'")
576 )
154b3e39
RS
577 (mail-extr-address-text-syntax-table
578 (?\000 ?\177 ".")
579 (?\200 ?\377 "w")
580 (?\040 " ")
581 (?\t " ")
582 (?\r " ")
583 (?\n " ")
72c0ae01
ER
584 (?A ?Z "w")
585 (?a ?z "w")
586 (?- "w")
587 (?\} "w")
588 (?\{ "w")
589 (?| "w")
590 (?\' "w")
591 (?~ "w")
592 (?0 ?9 "w"))
593 ))
594
595\f
154b3e39
RS
596;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
597;;
72c0ae01 598;; Utility functions and macros.
154b3e39
RS
599;;
600
ff9ed544
DL
601;; Fixme: There are Latin-1 nbsp below. If such characters should be
602;; included, this is the wrong thing to do -- it should use syntax (or
603;; regexp char classes).
604
7a9ebd0b 605(defsubst mail-extr-skip-whitespace-forward ()
154b3e39 606 ;; v19 fn skip-syntax-forward is more tasteful, but not byte-coded.
b1ccc1ce 607 (skip-chars-forward " \t\n\r "))
154b3e39 608
7a9ebd0b 609(defsubst mail-extr-skip-whitespace-backward ()
154b3e39 610 ;; v19 fn skip-syntax-backward is more tasteful, but not byte-coded.
b1ccc1ce 611 (skip-chars-backward " \t\n\r "))
7a9ebd0b 612
72c0ae01 613
7a9ebd0b
SM
614(defsubst mail-extr-undo-backslash-quoting (beg end)
615 (save-excursion
616 (save-restriction
617 (narrow-to-region beg end)
618 (goto-char (point-min))
619 ;; undo \ quoting
620 (while (search-forward "\\" nil t)
d980c402 621 (delete-char -1)
7a9ebd0b
SM
622 (or (eobp)
623 (forward-char 1))))))
624
625(defsubst mail-extr-nuke-char-at (pos)
626 (save-excursion
627 (goto-char pos)
d980c402 628 (delete-char 1)
7a9ebd0b 629 (insert ?\ )))
154b3e39
RS
630
631(put 'mail-extr-nuke-outside-range
632 'edebug-form-spec '(symbolp &optional form form atom))
633
634(defmacro mail-extr-nuke-outside-range (list-symbol
635 beg-symbol end-symbol
636 &optional no-replace)
d980c402
SM
637 "Delete all elements outside BEG..END in LIST.
638LIST-SYMBOL names a variable holding a list of buffer positions
639BEG-SYMBOL and END-SYMBOL name variables delimiting a range
640Each element of LIST-SYMBOL which lies outside of the range is
641 deleted from the list.
642Unless NO-REPLACE is true, at each of the positions in LIST-SYMBOL
643 which lie outside of the range, one character at that position is
644 replaced with a SPC."
154b3e39 645 (or (memq no-replace '(t nil))
29565a87 646 (error "no-replace must be t or nil, evaluable at macroexpand-time"))
d980c402 647 `(let ((temp ,list-symbol)
154b3e39 648 ch)
72c0ae01 649 (while temp
154b3e39 650 (setq ch (car temp))
d980c402
SM
651 (when (or (> ch ,end-symbol)
652 (< ch ,beg-symbol))
653 ,@(if no-replace
654 nil
655 `((mail-extr-nuke-char-at ch)))
656 (setcar temp nil))
72c0ae01 657 (setq temp (cdr temp)))
d980c402 658 (setq ,list-symbol (delq nil ,list-symbol))))
72c0ae01 659
154b3e39
RS
660(defun mail-extr-demarkerize (marker)
661 ;; if arg is a marker, destroys the marker, then returns the old value.
662 ;; otherwise returns the arg.
663 (if (markerp marker)
664 (let ((temp (marker-position marker)))
665 (set-marker marker nil)
666 temp)
667 marker))
668
669(defun mail-extr-markerize (pos)
670 ;; coerces pos to a marker if non-nil.
671 (if (or (markerp pos) (null pos))
672 pos
673 (copy-marker pos)))
674
7a9ebd0b 675(defsubst mail-extr-safe-move-sexp (arg)
154b3e39 676 ;; Safely skip over one balanced sexp, if there is one. Return t if success.
7a9ebd0b
SM
677 (condition-case error
678 (progn
679 (goto-char (or (scan-sexps (point) arg) (point)))
680 t)
681 (error
682 ;; #### kludge kludge kludge kludge kludge kludge kludge !!!
683 (if (string-equal (nth 1 error) "Unbalanced parentheses")
684 nil
685 (while t
686 (signal (car error) (cdr error)))))))
72c0ae01 687\f
154b3e39
RS
688;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
689;;
72c0ae01 690;; The main function to grind addresses
154b3e39
RS
691;;
692
693(defvar disable-initial-guessing-flag) ; dynamic assignment
0e4bb7f0
GM
694(defvar mailextr-cbeg) ; dynamic assignment
695(defvar mailextr-cend) ; dynamic assignment
4d96f7e7 696(defvar mail-extr-all-top-level-domains) ; Defined below.
72c0ae01 697
154b3e39 698;;;###autoload
d1782bd8
KH
699(defun mail-extract-address-components (address &optional all)
700 "Given an RFC-822 address ADDRESS, extract full name and canonical address.
e8f4db18
RS
701Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). If no
702name can be extracted, FULL-NAME will be nil. Also see
703`mail-extr-ignore-single-names' and
704`mail-extr-ignore-realname-equals-mailbox-name'.
d1782bd8
KH
705
706If the optional argument ALL is non-nil, then ADDRESS can contain zero
707or more recipients, separated by commas, and we return a list of
708the form ((FULL-NAME CANONICAL-ADDRESS) ...) with one element for
709each recipient. If ALL is nil, then if ADDRESS contains more than
710one recipients, all but the first is ignored.
711
7a9ebd0b 712ADDRESS may be a string or a buffer. If it is a buffer, the visible
2a487c6c
SJ
713\(narrowed) portion of the buffer will be interpreted as the address.
714\(This feature exists so that the clever caller might be able to avoid
715consing a string.)"
154b3e39
RS
716 (let ((canonicalization-buffer (get-buffer-create " *canonical address*"))
717 (extraction-buffer (get-buffer-create " *extract address components*"))
d1782bd8
KH
718 value-list)
719
7a9ebd0b 720 (with-current-buffer (get-buffer-create extraction-buffer)
154b3e39 721 (fundamental-mode)
e8a57935 722 (buffer-disable-undo extraction-buffer)
154b3e39 723 (set-syntax-table mail-extr-address-syntax-table)
72c0ae01
ER
724 (widen)
725 (erase-buffer)
726 (setq case-fold-search nil)
6c83d99f 727
72c0ae01
ER
728 ;; Insert extra space at beginning to allow later replacement with <
729 ;; without having to move markers.
154b3e39
RS
730 (insert ?\ )
731
732 ;; Insert the address itself.
733 (cond ((stringp address)
734 (insert address))
735 ((bufferp address)
736 (insert-buffer-substring address))
737 (t
29565a87 738 (error "Invalid address: %s" address)))
03007ccc
RS
739
740 (set-text-properties (point-min) (point-max) nil)
d1782bd8 741
7a9ebd0b 742 (with-current-buffer (get-buffer-create canonicalization-buffer)
d1782bd8
KH
743 (fundamental-mode)
744 (buffer-disable-undo canonicalization-buffer)
d1782bd8
KH
745 (setq case-fold-search nil))
746
6c83d99f 747
72c0ae01
ER
748 ;; Unfold multiple lines.
749 (goto-char (point-min))
750 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
751 (replace-match "\\1 " t))
6c83d99f 752
d1782bd8
KH
753 ;; Loop over addresses until we have as many as we want.
754 (while (and (or all (null value-list))
755 (progn (goto-char (point-min))
756 (skip-chars-forward " \t")
757 (not (eobp))))
758 (let (char
759 end-of-address
f98b752f 760 <-pos >-pos @-pos colon-pos comma-pos !-pos %-pos \;-pos
d1782bd8
KH
761 group-:-pos group-\;-pos route-addr-:-pos
762 record-pos-symbol
763 first-real-pos last-real-pos
764 phrase-beg phrase-end
0e4bb7f0
GM
765 ;; Dynamically set in mail-extr-voodoo.
766 mailextr-cbeg mailextr-cend
d1782bd8
KH
767 quote-beg quote-end
768 atom-beg atom-end
769 mbox-beg mbox-end
770 \.-ends-name
771 temp
772 ;; name-suffix
773 fi mi li ; first, middle, last initial
774 saved-%-pos saved-!-pos saved-@-pos
775 domain-pos \.-pos insert-point
776 ;; mailbox-name-processed-flag
777 disable-initial-guessing-flag) ; dynamically set from -voodoo
778
7a9ebd0b 779 (set-syntax-table mail-extr-address-syntax-table)
d1782bd8
KH
780 (goto-char (point-min))
781
782 ;; Insert extra space at beginning to allow later replacement with <
783 ;; without having to move markers.
784 (or (eq (following-char) ?\ )
785 (insert ?\ ))
786
787 ;; First pass grabs useful information about address.
788 (while (progn
789 (mail-extr-skip-whitespace-forward)
790 (not (eobp)))
791 (setq char (char-after (point)))
792 (or first-real-pos
793 (if (not (eq char ?\())
794 (setq first-real-pos (point))))
795 (cond
796 ;; comment
797 ((eq char ?\()
798 (set-syntax-table mail-extr-address-comment-syntax-table)
799 ;; only record the first non-empty comment's position
0e4bb7f0 800 (if (and (not mailextr-cbeg)
d1782bd8
KH
801 (save-excursion
802 (forward-char 1)
803 (mail-extr-skip-whitespace-forward)
804 (not (eq ?\) (char-after (point))))))
0e4bb7f0 805 (setq mailextr-cbeg (point)))
d1782bd8
KH
806 ;; TODO: don't record if unbalanced
807 (or (mail-extr-safe-move-sexp 1)
808 (forward-char 1))
809 (set-syntax-table mail-extr-address-syntax-table)
0e4bb7f0
GM
810 (if (and mailextr-cbeg
811 (not mailextr-cend))
812 (setq mailextr-cend (point))))
d1782bd8
KH
813 ;; quoted text
814 ((eq char ?\")
815 ;; only record the first non-empty quote's position
816 (if (and (not quote-beg)
817 (save-excursion
818 (forward-char 1)
819 (mail-extr-skip-whitespace-forward)
820 (not (eq ?\" (char-after (point))))))
821 (setq quote-beg (point)))
822 ;; TODO: don't record if unbalanced
823 (or (mail-extr-safe-move-sexp 1)
824 (forward-char 1))
825 (if (and quote-beg
826 (not quote-end))
827 (setq quote-end (point))))
828 ;; domain literals
829 ((eq char ?\[)
830 (set-syntax-table mail-extr-address-domain-literal-syntax-table)
831 (or (mail-extr-safe-move-sexp 1)
832 (forward-char 1))
833 (set-syntax-table mail-extr-address-syntax-table))
834 ;; commas delimit addresses when outside < > pairs.
835 ((and (eq char ?,)
836 (or (and (null <-pos)
837 ;; Handle ROUTE-ADDR address that is missing its <.
838 (not (eq ?@ (char-after (1+ (point))))))
839 (and >-pos
840 ;; handle weird munged addresses
841 ;; BUG FIX: This test was reversed. Thanks to the
842 ;; brilliant Rod Whitby <rwhitby@research.canon.oz.au>
843 ;; for discovering this!
7a9ebd0b 844 (< (car (last <-pos)) (car >-pos)))))
d1782bd8
KH
845 ;; The argument contains more than one address.
846 ;; Temporarily hide everything after this one.
7a9ebd0b 847 (setq end-of-address (copy-marker (1+ (point)) t))
d1782bd8 848 (narrow-to-region (point-min) (1+ (point)))
7a9ebd0b 849 (delete-char 1)
d1782bd8
KH
850 (setq char ?\() ; HAVE I NO SHAME??
851 )
852 ;; record the position of various interesting chars, determine
4f9c00e2 853 ;; validity later.
d1782bd8
KH
854 ((setq record-pos-symbol
855 (cdr (assq char
856 '((?< . <-pos) (?> . >-pos) (?@ . @-pos)
f98b752f 857 (?: . colon-pos) (?, . comma-pos) (?! . !-pos)
d1782bd8
KH
858 (?% . %-pos) (?\; . \;-pos)))))
859 (set record-pos-symbol
860 (cons (point) (symbol-value record-pos-symbol)))
72c0ae01 861 (forward-char 1))
d1782bd8 862 ((eq char ?.)
72c0ae01 863 (forward-char 1))
d1782bd8 864 ((memq char '(
4f9c00e2 865 ;; comment terminator invalid
d1782bd8 866 ?\)
4f9c00e2 867 ;; domain literal terminator invalid
d1782bd8
KH
868 ?\]
869 ;; \ allowed only within quoted strings,
870 ;; domain literals, and comments
871 ?\\
872 ))
873 (mail-extr-nuke-char-at (point))
72c0ae01 874 (forward-char 1))
d1782bd8 875 (t
b907b629
KH
876 ;; Do `(forward-word 1)', recognizing non-ASCII characters
877 ;; except Latin-1 nbsp as words.
878 (while (progn
879 (skip-chars-forward "^\000-\177 ")
880 (and (not (eobp))
881 (eq ?w (char-syntax (char-after)))
882 (progn
883 (forward-word 1)
884 (and (not (eobp))
885 (> (char-after) ?\177)
886 (not (eq (char-after) ? )))))))))
d1782bd8
KH
887 (or (eq char ?\()
888 ;; At the end of first address of a multiple address header.
889 (and (eq char ?,)
890 (eobp))
891 (setq last-real-pos (point))))
892
893 ;; Use only the leftmost <, if any. Replace all others with spaces.
894 (while (cdr <-pos)
895 (mail-extr-nuke-char-at (car <-pos))
896 (setq <-pos (cdr <-pos)))
897
898 ;; Use only the rightmost >, if any. Replace all others with spaces.
899 (while (cdr >-pos)
900 (mail-extr-nuke-char-at (nth 1 >-pos))
901 (setcdr >-pos (nthcdr 2 >-pos)))
902
903 ;; If multiple @s and a :, but no < and >, insert around buffer.
904 ;; Example: @foo.bar.dom,@xxx.yyy.zzz:mailbox@aaa.bbb.ccc
905 ;; This commonly happens on the UUCP "From " line. Ugh.
d980c402 906 (when (and (> (length @-pos) 1)
f98b752f 907 (eq 1 (length colon-pos)) ;TODO: check if between last two @s
d1782bd8
KH
908 (not \;-pos)
909 (not <-pos))
d980c402
SM
910 (goto-char (point-min))
911 (delete-char 1)
912 (setq <-pos (list (point)))
913 (insert ?<))
d1782bd8
KH
914
915 ;; If < but no >, insert > in rightmost possible position
d980c402
SM
916 (when (and <-pos (null >-pos))
917 (goto-char (point-max))
918 (setq >-pos (list (point)))
919 (insert ?>))
d1782bd8
KH
920
921 ;; If > but no <, replace > with space.
d980c402
SM
922 (when (and >-pos (null <-pos))
923 (mail-extr-nuke-char-at (car >-pos))
924 (setq >-pos nil))
d1782bd8
KH
925
926 ;; Turn >-pos and <-pos into non-lists
927 (setq >-pos (car >-pos)
928 <-pos (car <-pos))
929
930 ;; Trim other punctuation lists of items outside < > pair to handle
931 ;; stupid MTAs.
d980c402
SM
932 (when <-pos ; don't need to check >-pos also
933 ;; handle bozo software that violates RFC 822 by sticking
934 ;; punctuation marks outside of a < > pair
935 (mail-extr-nuke-outside-range @-pos <-pos >-pos t)
936 ;; RFC 822 says nothing about these two outside < >, but
937 ;; remove those positions from the lists to make things
938 ;; easier.
939 (mail-extr-nuke-outside-range !-pos <-pos >-pos t)
940 (mail-extr-nuke-outside-range %-pos <-pos >-pos t))
d1782bd8
KH
941
942 ;; Check for : that indicates GROUP list and for : part of
943 ;; ROUTE-ADDR spec.
944 ;; Can't possibly be more than two :. Nuke any extra.
f98b752f
KH
945 (while colon-pos
946 (setq temp (car colon-pos)
947 colon-pos (cdr colon-pos))
d1782bd8
KH
948 (cond ((and <-pos >-pos
949 (> temp <-pos)
950 (< temp >-pos))
951 (if (or route-addr-:-pos
952 (< (length @-pos) 2)
953 (> temp (car @-pos))
954 (< temp (nth 1 @-pos)))
955 (mail-extr-nuke-char-at temp)
956 (setq route-addr-:-pos temp)))
957 ((or (not <-pos)
958 (and <-pos
959 (< temp <-pos)))
960 (setq group-:-pos temp))))
961
962 ;; Nuke any ; that is in or to the left of a < > pair or to the left
963 ;; of a GROUP starting :. Also, there may only be one ;.
964 (while \;-pos
965 (setq temp (car \;-pos)
966 \;-pos (cdr \;-pos))
967 (cond ((and <-pos >-pos
968 (> temp <-pos)
969 (< temp >-pos))
970 (mail-extr-nuke-char-at temp))
971 ((and (or (not group-:-pos)
972 (> temp group-:-pos))
973 (not group-\;-pos))
974 (setq group-\;-pos temp))))
975
976 ;; Nuke unmatched GROUP syntax characters.
d980c402
SM
977 (when (and group-:-pos (not group-\;-pos))
978 ;; *** Do I really need to erase it?
979 (mail-extr-nuke-char-at group-:-pos)
980 (setq group-:-pos nil))
981 (when (and group-\;-pos (not group-:-pos))
982 ;; *** Do I really need to erase it?
983 (mail-extr-nuke-char-at group-\;-pos)
984 (setq group-\;-pos nil))
d1782bd8
KH
985
986 ;; Handle junk like ";@host.company.dom" that sendmail adds.
987 ;; **** should I remember comment positions?
d980c402 988 (when group-\;-pos
d1782bd8
KH
989 ;; this is fine for now
990 (mail-extr-nuke-outside-range !-pos group-:-pos group-\;-pos t)
991 (mail-extr-nuke-outside-range @-pos group-:-pos group-\;-pos t)
992 (mail-extr-nuke-outside-range %-pos group-:-pos group-\;-pos t)
993 (mail-extr-nuke-outside-range comma-pos group-:-pos group-\;-pos t)
994 (and last-real-pos
995 (> last-real-pos (1+ group-\;-pos))
996 (setq last-real-pos (1+ group-\;-pos)))
997 ;; *** This may be wrong:
0e4bb7f0
GM
998 (and mailextr-cend
999 (> mailextr-cend group-\;-pos)
1000 (setq mailextr-cend nil
1001 mailextr-cbeg nil))
d1782bd8
KH
1002 (and quote-end
1003 (> quote-end group-\;-pos)
1004 (setq quote-end nil
1005 quote-beg nil))
1006 ;; This was both wrong and unnecessary:
1007 ;;(narrow-to-region (point-min) group-\;-pos)
1008
1009 ;; *** The entire handling of GROUP addresses seems rather lame.
1010 ;; *** It deserves a complete rethink, except that these addresses
1011 ;; *** are hardly ever seen.
d980c402 1012 )
d1782bd8
KH
1013
1014 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any
1015 ;; others.
6c83d99f 1016 ;; Hell, go ahead and nuke all of the commas.
d1782bd8
KH
1017 ;; **** This will cause problems when we start handling commas in
1018 ;; the PHRASE part .... no it won't ... yes it will ... ?????
1019 (mail-extr-nuke-outside-range comma-pos 1 1)
1020
1021 ;; can only have multiple @s inside < >. The fact that some MTAs
1022 ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is
1023 ;; handled above.
1024
1025 ;; Locate PHRASE part of ROUTE-ADDR.
d980c402
SM
1026 (when <-pos
1027 (goto-char <-pos)
1028 (mail-extr-skip-whitespace-backward)
1029 (setq phrase-end (point))
1030 (goto-char (or ;;group-:-pos
1031 (point-min)))
1032 (mail-extr-skip-whitespace-forward)
1033 (if (< (point) phrase-end)
1034 (setq phrase-beg (point))
1035 (setq phrase-end nil)))
d1782bd8
KH
1036
1037 ;; handle ROUTE-ADDRS with real ROUTEs.
1038 ;; If there are multiple @s, then we assume ROUTE-ADDR syntax, and
1039 ;; any % or ! must be semantically meaningless.
1040 ;; TODO: do this processing into canonicalization buffer
d980c402
SM
1041 (when route-addr-:-pos
1042 (setq !-pos nil
1043 %-pos nil
1044 >-pos (copy-marker >-pos)
1045 route-addr-:-pos (copy-marker route-addr-:-pos))
1046 (goto-char >-pos)
1047 (insert-before-markers ?X)
1048 (goto-char (car @-pos))
1049 (while (setq @-pos (cdr @-pos))
1050 (delete-char 1)
1051 (setq %-pos (cons (point-marker) %-pos))
1052 (insert "%")
1053 (goto-char (1- >-pos))
1054 (save-excursion
1055 (insert-buffer-substring extraction-buffer
1056 (car @-pos) route-addr-:-pos)
1057 (delete-region (car @-pos) route-addr-:-pos))
1058 (or (cdr @-pos)
1059 (setq saved-@-pos (list (point)))))
1060 (setq @-pos saved-@-pos)
1061 (goto-char >-pos)
1062 (delete-char -1)
1063 (mail-extr-nuke-char-at route-addr-:-pos)
1064 (mail-extr-demarkerize route-addr-:-pos)
1065 (setq route-addr-:-pos nil
1066 >-pos (mail-extr-demarkerize >-pos)
1067 %-pos (mapcar 'mail-extr-demarkerize %-pos)))
d1782bd8
KH
1068
1069 ;; de-listify @-pos
1070 (setq @-pos (car @-pos))
1071
1072 ;; TODO: remove comments in the middle of an address
1073
d980c402 1074 (with-current-buffer canonicalization-buffer
d1782bd8
KH
1075 (widen)
1076 (erase-buffer)
1077 (insert-buffer-substring extraction-buffer)
1078
1079 (if <-pos
1080 (narrow-to-region (progn
1081 (goto-char (1+ <-pos))
1082 (mail-extr-skip-whitespace-forward)
1083 (point))
1084 >-pos)
1085 (if (and first-real-pos last-real-pos)
1086 (narrow-to-region first-real-pos last-real-pos)
1087 ;; ****** Oh no! What if the address is completely empty!
1088 ;; *** Is this correct?
d980c402 1089 (narrow-to-region (point-max) (point-max))))
d1782bd8
KH
1090
1091 (and @-pos %-pos
1092 (mail-extr-nuke-outside-range %-pos (point-min) @-pos))
1093 (and %-pos !-pos
1094 (mail-extr-nuke-outside-range !-pos (point-min) (car %-pos)))
1095 (and @-pos !-pos (not %-pos)
1096 (mail-extr-nuke-outside-range !-pos (point-min) @-pos))
1097
1098 ;; Error condition:?? (and %-pos (not @-pos))
1099
1100 ;; WARNING: THIS CODE IS DUPLICATED BELOW.
d980c402
SM
1101 (when (and %-pos (not @-pos))
1102 (goto-char (car %-pos))
1103 (delete-char 1)
1104 (setq @-pos (point))
1105 (insert "@")
1106 (setq %-pos (cdr %-pos)))
1107
1108 (when (and mail-extr-mangle-uucp !-pos)
1109 ;; **** I don't understand this save-restriction and the
1110 ;; narrow-to-region inside it. Why did I do that?
1111 (save-restriction
1112 (cond ((and @-pos
1113 mail-extr-@-binds-tighter-than-!)
1114 (goto-char @-pos)
1115 (setq %-pos (cons (point) %-pos)
1116 @-pos nil)
1117 (delete-char 1)
1118 (insert "%")
1119 (setq insert-point (point-max)))
1120 (mail-extr-@-binds-tighter-than-!
1121 (setq insert-point (point-max)))
1122 (%-pos
1123 (setq insert-point (car (last %-pos))
1124 saved-%-pos (mapcar 'mail-extr-markerize %-pos)
1125 %-pos nil
1126 @-pos (mail-extr-markerize @-pos)))
1127 (@-pos
1128 (setq insert-point @-pos)
1129 (setq @-pos (mail-extr-markerize @-pos)))
1130 (t
1131 (setq insert-point (point-max))))
1132 (narrow-to-region (point-min) insert-point)
1133 (setq saved-!-pos (car !-pos))
1134 (while !-pos
1135 (goto-char (point-max))
1136 (cond ((and (not @-pos)
1137 (not (cdr !-pos)))
1138 (setq @-pos (point))
1139 (insert-before-markers "@ "))
1140 (t
1141 (setq %-pos (cons (point) %-pos))
1142 (insert-before-markers "% ")))
1143 (backward-char 1)
1144 (insert-buffer-substring
1145 (current-buffer)
1146 (if (nth 1 !-pos)
1147 (1+ (nth 1 !-pos))
1148 (point-min))
1149 (car !-pos))
1150 (delete-char 1)
1151 (or (save-excursion
1152 (mail-extr-safe-move-sexp -1)
1153 (mail-extr-skip-whitespace-backward)
1154 (eq ?. (preceding-char)))
1155 (insert-before-markers
1156 (if (save-excursion
1157 (mail-extr-skip-whitespace-backward)
1158 (eq ?. (preceding-char)))
1159 ""
1160 ".")
1161 "uucp"))
1162 (setq !-pos (cdr !-pos))))
1163 (and saved-%-pos
1164 (setq %-pos (append (mapcar 'mail-extr-demarkerize
1165 saved-%-pos)
1166 %-pos)))
1167 (setq @-pos (mail-extr-demarkerize @-pos))
1168 (narrow-to-region (1+ saved-!-pos) (point-max)))
d1782bd8
KH
1169
1170 ;; WARNING: THIS CODE IS DUPLICATED ABOVE.
d980c402
SM
1171 (when (and %-pos (not @-pos))
1172 (goto-char (car %-pos))
1173 (delete-char 1)
1174 (setq @-pos (point))
1175 (insert "@")
1176 (setq %-pos (cdr %-pos)))
1177
1178 (when (setq %-pos (nreverse %-pos)) ; implies @-pos valid
1179 (setq temp %-pos)
1180 (catch 'truncated
1181 (while temp
1182 (goto-char (or (nth 1 temp)
1183 @-pos))
1184 (mail-extr-skip-whitespace-backward)
1185 (save-excursion
1186 (mail-extr-safe-move-sexp -1)
1187 (setq domain-pos (point))
1188 (mail-extr-skip-whitespace-backward)
1189 (setq \.-pos (eq ?. (preceding-char))))
1190 (when (and \.-pos
1191 ;; #### string consing
1192 (let ((s (intern-soft
1193 (buffer-substring domain-pos (point))
1194 mail-extr-all-top-level-domains)))
1195 (and s (get s 'domain-name))))
1196 (narrow-to-region (point-min) (point))
1197 (goto-char (car temp))
1198 (delete-char 1)
1199 (setq @-pos (point))
1200 (setcdr temp nil)
1201 (setq %-pos (delq @-pos %-pos))
1202 (insert "@")
1203 (throw 'truncated t))
1204 (setq temp (cdr temp)))))
d1782bd8
KH
1205 (setq mbox-beg (point-min)
1206 mbox-end (if %-pos (car %-pos)
1207 (or @-pos
d980c402
SM
1208 (point-max))))
1209
1210 (when @-pos
1211 ;; Make the domain-name part lowercase since it's case
1212 ;; insensitive anyway.
1213 (downcase-region (1+ @-pos) (point-max))))
d1782bd8
KH
1214
1215 ;; Done canonicalizing address.
1216 ;; We are now back in extraction-buffer.
1217
1218 ;; Decide what part of the address to search to find the full name.
1219 (cond (
1220 ;; Example: "First M. Last" <fml@foo.bar.dom>
1221 (and phrase-beg
1222 (eq quote-beg phrase-beg)
1223 (<= quote-end phrase-end))
1224 (narrow-to-region (1+ quote-beg) (1- quote-end))
1225 (mail-extr-undo-backslash-quoting (point-min) (point-max)))
1226
1227 ;; Example: First Last <fml@foo.bar.dom>
1228 (phrase-beg
1229 (narrow-to-region phrase-beg phrase-end))
1230
1231 ;; Example: fml@foo.bar.dom (First M. Last)
0e4bb7f0
GM
1232 (mailextr-cbeg
1233 (narrow-to-region (1+ mailextr-cbeg) (1- mailextr-cend))
d1782bd8
KH
1234 (mail-extr-undo-backslash-quoting (point-min) (point-max))
1235
1236 ;; Deal with spacing problems
1237 (goto-char (point-min))
1238;;; (cond ((not (search-forward " " nil t))
1239;;; (goto-char (point-min))
1240;;; (cond ((search-forward "_" nil t)
1241;;; ;; Handle the *idiotic* use of underlines as spaces.
1242;;; ;; Example: fml@foo.bar.dom (First_M._Last)
1243;;; (goto-char (point-min))
1244;;; (while (search-forward "_" nil t)
1245;;; (replace-match " " t)))
1246;;; ((search-forward "." nil t)
1247;;; ;; Fix . used as space
1248;;; ;; Example: danj1@cb.att.com (daniel.jacobson)
1249;;; (goto-char (point-min))
1250;;; (while (re-search-forward mail-extr-bad-dot-pattern nil t)
1251;;; (replace-match "\\1 \\2" t))))))
154b3e39 1252 )
d1782bd8
KH
1253
1254 ;; Otherwise we try to get the name from the mailbox portion
1255 ;; of the address.
1256 ;; Example: First_M_Last@foo.bar.dom
72c0ae01 1257 (t
d1782bd8
KH
1258 ;; *** Work in canon buffer instead? No, can't. Hmm.
1259 (goto-char (point-max))
1260 (narrow-to-region (point) (point))
1261 (insert-buffer-substring canonicalization-buffer
1262 mbox-beg mbox-end)
1263 (goto-char (point-min))
1264
1265 ;; Example: First_Last.XXX@foo.bar.dom
1266 (setq \.-ends-name (re-search-forward "[_0-9]" nil t))
1267
1268 (goto-char (point-min))
1269
1270 (if (not mail-extr-mangle-uucp)
1271 (modify-syntax-entry ?! "w" (syntax-table)))
1272
1273 (while (progn
1274 (mail-extr-skip-whitespace-forward)
1275 (not (eobp)))
1276 (setq char (char-after (point)))
154b3e39 1277 (cond
d1782bd8
KH
1278 ((eq char ?\")
1279 (setq quote-beg (point))
1280 (or (mail-extr-safe-move-sexp 1)
1281 ;; TODO: handle this error condition!!!!!
1282 (forward-char 1))
1283 ;; take into account deletions
1284 (setq quote-end (- (point) 2))
1285 (save-excursion
1286 (backward-char 1)
d980c402 1287 (delete-char 1)
d1782bd8
KH
1288 (goto-char quote-beg)
1289 (or (eobp)
d980c402 1290 (delete-char 1)))
d1782bd8
KH
1291 (mail-extr-undo-backslash-quoting quote-beg quote-end)
1292 (or (eq ?\ (char-after (point)))
1293 (insert " "))
1294 ;; (setq mailbox-name-processed-flag t)
1295 (setq \.-ends-name t))
1296 ((eq char ?.)
1297 (if (memq (char-after (1+ (point))) '(?_ ?=))
1298 (progn
1299 (forward-char 1)
d980c402 1300 (delete-char 1)
d1782bd8
KH
1301 (insert ?\ ))
1302 (if \.-ends-name
1303 (narrow-to-region (point-min) (point))
d980c402 1304 (delete-char 1)
d1782bd8
KH
1305 (insert " ")))
1306 ;; (setq mailbox-name-processed-flag t)
1307 )
1308 ((memq (char-syntax char) '(?. ?\\))
d980c402 1309 (delete-char 1)
d1782bd8
KH
1310 (insert " ")
1311 ;; (setq mailbox-name-processed-flag t)
154b3e39 1312 )
154b3e39 1313 (t
d1782bd8
KH
1314 (setq atom-beg (point))
1315 (forward-word 1)
1316 (setq atom-end (point))
1317 (goto-char atom-beg)
1318 (save-restriction
1319 (narrow-to-region atom-beg atom-end)
1320 (cond
1321
1322 ;; Handle X.400 addresses encoded in RFC-822.
1323 ;; *** Shit! This has to handle the case where it is
1324 ;; *** embedded in a quote too!
1325 ;; *** Shit! The input is being broken up into atoms
1326 ;; *** by periods!
1327 ((looking-at mail-extr-x400-encoded-address-pattern)
1328
1329 ;; Copy the contents of the individual fields that
1330 ;; might hold name data to the beginning.
d980c402
SM
1331 (mapc
1332 (lambda (field-pattern)
1333 (when
1334 (save-excursion
1335 (re-search-forward field-pattern nil t))
1336 (insert-buffer-substring (current-buffer)
1337 (match-beginning 1)
1338 (match-end 1))
1339 (insert " ")))
d1782bd8
KH
1340 (list mail-extr-x400-encoded-address-given-name-pattern
1341 mail-extr-x400-encoded-address-surname-pattern
1342 mail-extr-x400-encoded-address-full-name-pattern))
1343
1344 ;; Discard the rest, since it contains stuff like
1345 ;; routing information, not part of a name.
1346 (mail-extr-skip-whitespace-backward)
1347 (delete-region (point) (point-max))
1348
1349 ;; Handle periods used for spacing.
1350 (while (re-search-forward mail-extr-bad-dot-pattern nil t)
1351 (replace-match "\\1 \\2" t))
1352
1353 ;; (setq mailbox-name-processed-flag t)
1354 )
1355
1356 ;; Handle normal addresses.
1357 (t
1358 (goto-char (point-min))
1359 ;; Handle _ and = used for spacing.
1360 (while (re-search-forward "\\([^_=]+\\)[_=]" nil t)
1361 (replace-match "\\1 " t)
1362 ;; (setq mailbox-name-processed-flag t)
1363 )
1364 (goto-char (point-max))))))))
1365
1366 ;; undo the dirty deed
1367 (if (not mail-extr-mangle-uucp)
1368 (modify-syntax-entry ?! "." (syntax-table)))
1369 ;;
1370 ;; If we derived the name from the mailbox part of the address,
1371 ;; and we only got one word out of it, don't treat that as a
1372 ;; name. "foo@bar" --> (nil "foo@bar"), not ("foo" "foo@bar")
1373 ;; (if (not mailbox-name-processed-flag)
1374 ;; (delete-region (point-min) (point-max)))
1375 ))
1376
1377 (set-syntax-table mail-extr-address-text-syntax-table)
1378
1379 (mail-extr-voodoo mbox-beg mbox-end canonicalization-buffer)
1380 (goto-char (point-min))
1381
1382 ;; If name is "First Last" and userid is "F?L", then assume
1383 ;; the middle initial is the second letter in the userid.
1384 ;; Initial code by Jamie Zawinski <jwz@lucid.com>
1385 ;; *** Make it work when there's a suffix as well.
1386 (goto-char (point-min))
d980c402
SM
1387 (when (and mail-extr-guess-middle-initial
1388 (not disable-initial-guessing-flag)
1389 (eq 3 (- mbox-end mbox-beg))
1390 (progn
1391 (goto-char (point-min))
1392 (looking-at mail-extr-two-name-pattern)))
1393 (setq fi (char-after (match-beginning 0))
1394 li (char-after (match-beginning 3)))
1395 (with-current-buffer canonicalization-buffer
1396 ;; char-equal is ignoring case here, so no need to upcase
1397 ;; or downcase.
1398 (let ((case-fold-search t))
1399 (and (char-equal fi (char-after mbox-beg))
1400 (char-equal li (char-after (1- mbox-end)))
1401 (setq mi (char-after (1+ mbox-beg))))))
1402 (when (and mi
1403 ;; TODO: use better table than syntax table
1404 (eq ?w (char-syntax mi)))
1405 (goto-char (match-beginning 3))
1406 (insert (upcase mi) ". ")))
d1782bd8
KH
1407
1408 ;; Nuke name if it is the same as mailbox name.
1409 (let ((buffer-length (- (point-max) (point-min)))
1410 (i 0)
1411 (names-match-flag t))
d980c402
SM
1412 (when (and (> buffer-length 0)
1413 (eq buffer-length (- mbox-end mbox-beg)))
1414 (goto-char (point-max))
1415 (insert-buffer-substring canonicalization-buffer
1416 mbox-beg mbox-end)
1417 (while (and names-match-flag
1418 (< i buffer-length))
1419 (or (eq (downcase (char-after (+ i (point-min))))
1420 (downcase
1421 (char-after (+ i buffer-length (point-min)))))
1422 (setq names-match-flag nil))
1423 (setq i (1+ i)))
1424 (delete-region (+ (point-min) buffer-length) (point-max))
bf247b6e 1425 (and names-match-flag
67e61e16
EZ
1426 mail-extr-ignore-realname-equals-mailbox-name
1427 (narrow-to-region (point) (point)))))
d1782bd8
KH
1428
1429 ;; Nuke name if it's just one word.
1430 (goto-char (point-min))
1431 (and mail-extr-ignore-single-names
1432 (not (re-search-forward "[- ]" nil t))
1433 (narrow-to-region (point) (point)))
1434
1435 ;; Record the result
1436 (setq value-list
1437 (cons (list (if (not (= (point-min) (point-max)))
1438 (buffer-string))
d980c402 1439 (with-current-buffer canonicalization-buffer
d1782bd8
KH
1440 (if (not (= (point-min) (point-max)))
1441 (buffer-string))))
1442 value-list))
1443
1444 ;; Unless one address is all we wanted,
1445 ;; delete this one from extraction-buffer
1446 ;; and get ready to extract the next address.
1447 (when all
1448 (if end-of-address
1449 (narrow-to-region 1 end-of-address)
1450 (widen))
1451 (delete-region (point-min) (point-max))
1452 (widen))
1453 )))
1454 (if all (nreverse value-list) (car value-list))
1455 ))
72c0ae01 1456
2d8a5449 1457(defcustom mail-extr-disable-voodoo "\\cj"
fb7ada5f 1458 "If it is a regexp, names matching it will never be modified.
2d8a5449
KH
1459If it is neither nil nor a string, modifying of names will never take
1460place. It affects how `mail-extract-address-components' works."
1461 :type '(choice (regexp :size 0)
1462 (const :tag "Always enabled" nil)
1463 (const :tag "Always disabled" t))
1464 :group 'mail-extr)
1465
154b3e39 1466(defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer)
2d8a5449
KH
1467 (unless (and mail-extr-disable-voodoo
1468 (or (not (stringp mail-extr-disable-voodoo))
1469 (progn
1470 (goto-char (point-min))
1471 (re-search-forward mail-extr-disable-voodoo nil t))))
1472 (let ((word-count 0)
1473 (case-fold-search nil)
1474 mixed-case-flag lower-case-flag ;;upper-case-flag
1475 suffix-flag last-name-comma-flag
2d8a5449
KH
1476 initial
1477 begin-again-flag
1478 drop-this-word-if-trailing-flag
1479 drop-last-word-if-trailing-flag
1480 word-found-flag
1481 this-word-beg last-word-beg
1482 name-beg name-end
1483 name-done-flag
1484 )
1485 (save-excursion
1486 (set-syntax-table mail-extr-address-text-syntax-table)
1487
1488 ;; Get rid of comments.
d980c402 1489 (goto-char (point-min))
2d8a5449
KH
1490 (while (not (eobp))
1491 ;; Initialize for this iteration of the loop.
1492 (skip-chars-forward "^({[\"'`")
1493 (let ((cbeg (point)))
1494 (set-syntax-table mail-extr-address-text-comment-syntax-table)
1495 (if (memq (following-char) '(?\' ?\`))
1496 (search-forward "'" nil 'move
1497 (if (eq ?\' (following-char)) 2 1))
1498 (or (mail-extr-safe-move-sexp 1)
1499 (goto-char (point-max))))
1500 (set-syntax-table mail-extr-address-text-syntax-table)
1501 (when (eq (char-after cbeg) ?\()
1502 ;; Delete the comment itself.
1503 (delete-region cbeg (point))
1504 ;; Canonicalize whitespace where the comment was.
1505 (skip-chars-backward " \t")
1506 (if (looking-at "\\([ \t]+$\\|[ \t]+,\\)")
1507 (replace-match "")
1508 (setq cbeg (point))
1509 (skip-chars-forward " \t")
1510 (if (bobp)
1511 (delete-region (point) cbeg)
1512 (just-one-space))))))
1513
1514 ;; This was moved above.
1515 ;; Fix . used as space
1516 ;; But it belongs here because it occurs not only as
1517 ;; rypens@reks.uia.ac.be (Piet.Rypens)
1518 ;; but also as
1519 ;; "Piet.Rypens" <rypens@reks.uia.ac.be>
1520 ;;(goto-char (point-min))
1521 ;;(while (re-search-forward mail-extr-bad-dot-pattern nil t)
1522 ;; (replace-match "\\1 \\2" t))
1523
1524 (unless (search-forward " " nil t)
1525 (goto-char (point-min))
1526 (cond ((search-forward "_" nil t)
1527 ;; Handle the *idiotic* use of underlines as spaces.
1528 ;; Example: fml@foo.bar.dom (First_M._Last)
1529 (goto-char (point-min))
1530 (while (search-forward "_" nil t)
1531 (replace-match " " t)))
1532 ((search-forward "." nil t)
1533 ;; Fix . used as space
1534 ;; Example: danj1@cb.att.com (daniel.jacobson)
1535 (goto-char (point-min))
1536 (while (re-search-forward mail-extr-bad-dot-pattern nil t)
1537 (replace-match "\\1 \\2" t)))))
6c83d99f 1538
2d8a5449
KH
1539 ;; Loop over the words (and other junk) in the name.
1540 (goto-char (point-min))
1541 (while (not name-done-flag)
1542
1543 (when word-found-flag
1544 ;; Last time through this loop we skipped over a word.
1545 (setq last-word-beg this-word-beg)
1546 (setq drop-last-word-if-trailing-flag
1547 drop-this-word-if-trailing-flag)
1548 (setq word-found-flag nil))
1549
1550 (when begin-again-flag
1551 ;; Last time through the loop we found something that
1552 ;; indicates we should pretend we are beginning again from
1553 ;; the start.
1554 (setq word-count 0)
1555 (setq last-word-beg nil)
1556 (setq drop-last-word-if-trailing-flag nil)
1557 (setq mixed-case-flag nil)
1558 (setq lower-case-flag nil)
1559 ;; (setq upper-case-flag nil)
1560 (setq begin-again-flag nil))
1561
1562 ;; Initialize for this iteration of the loop.
154b3e39 1563 (mail-extr-skip-whitespace-forward)
2d8a5449
KH
1564 (if (eq word-count 0) (narrow-to-region (point) (point-max)))
1565 (setq this-word-beg (point))
1566 (setq drop-this-word-if-trailing-flag nil)
1567
1568 ;; Decide what to do based on what we are looking at.
72c0ae01 1569 (cond
2d8a5449
KH
1570
1571 ;; Delete title
72c0ae01 1572 ((and (eq word-count 0)
2d8a5449
KH
1573 (looking-at mail-extr-full-name-prefixes))
1574 (goto-char (match-end 0))
1575 (narrow-to-region (point) (point-max)))
6c83d99f 1576
2d8a5449
KH
1577 ;; Stop after name suffix
1578 ((and (>= word-count 2)
1579 (looking-at mail-extr-full-name-suffix-pattern))
1580 (mail-extr-skip-whitespace-backward)
1581 (setq suffix-flag (point))
1582 (if (eq ?, (following-char))
1583 (forward-char 1)
1584 (insert ?,))
1585 ;; Enforce at least one space after comma
1586 (or (eq ?\ (following-char))
1587 (insert ?\ ))
1588 (mail-extr-skip-whitespace-forward)
1589 (cond ((memq (following-char) '(?j ?J ?s ?S))
1590 (capitalize-word 1)
1591 (if (eq (following-char) ?.)
1592 (forward-char 1)
1593 (insert ?.)))
1594 (t
1595 (upcase-word 1)))
1596 (setq word-found-flag t)
1597 (setq name-done-flag t))
1598
1599 ;; Handle SCA names
1600 ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As"
1601 (goto-char (match-beginning 1))
1602 (narrow-to-region (point) (point-max))
1603 (setq begin-again-flag t))
1604
1605 ;; Check for initial last name followed by comma
1606 ((and (eq ?, (following-char))
1607 (eq word-count 1))
1608 (forward-char 1)
1609 (setq last-name-comma-flag t)
1610 (or (eq ?\ (following-char))
1611 (insert ?\ )))
1612
1613 ;; Stop before trailing comma-separated comment
1614 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
1615 ;; *** This case is redundant???
1616 ;;((eq ?, (following-char))
1617 ;; (setq name-done-flag t))
1618
1619 ;; Delete parenthesized/quoted comment/nickname
1620 ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`))
0e4bb7f0 1621 (setq mailextr-cbeg (point))
2d8a5449
KH
1622 (set-syntax-table mail-extr-address-text-comment-syntax-table)
1623 (cond ((memq (following-char) '(?\' ?\`))
1624 (or (search-forward "'" nil t
1625 (if (eq ?\' (following-char)) 2 1))
1626 (delete-char 1)))
1627 (t
1628 (or (mail-extr-safe-move-sexp 1)
1629 (goto-char (point-max)))))
1630 (set-syntax-table mail-extr-address-text-syntax-table)
0e4bb7f0 1631 (setq mailextr-cend (point))
2d8a5449
KH
1632 (cond
1633 ;; Handle case of entire name being quoted
1634 ((and (eq word-count 0)
1635 (looking-at " *\\'")
0e4bb7f0
GM
1636 (>= (- mailextr-cend mailextr-cbeg) 2))
1637 (narrow-to-region (1+ mailextr-cbeg) (1- mailextr-cend))
2d8a5449
KH
1638 (goto-char (point-min)))
1639 (t
1640 ;; Handle case of quoted initial
0e4bb7f0
GM
1641 (if (and (or (= 3 (- mailextr-cend mailextr-cbeg))
1642 (and (= 4 (- mailextr-cend mailextr-cbeg))
1643 (eq ?. (char-after (+ 2 mailextr-cbeg)))))
2d8a5449 1644 (not (looking-at " *\\'")))
0e4bb7f0 1645 (setq initial (char-after (1+ mailextr-cbeg)))
2d8a5449 1646 (setq initial nil))
0e4bb7f0 1647 (delete-region mailextr-cbeg mailextr-cend)
2d8a5449
KH
1648 (if initial
1649 (insert initial ". ")))))
1650
1651 ;; Handle *Stupid* VMS date stamps
1652 ((looking-at mail-extr-stupid-vms-date-stamp-pattern)
1653 (replace-match "" t))
1654
1655 ;; Handle Chinese characters.
1656 ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern)
1657 (goto-char (match-end 0))
1658 (setq word-found-flag t))
1659
1660 ;; Skip initial garbage characters.
1661 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
1662 ((and (eq word-count 0)
1663 (looking-at mail-extr-leading-garbage))
1664 (goto-char (match-end 0))
1665 ;; *** Skip backward over these???
1666 ;; (skip-chars-backward "& \"")
1667 (narrow-to-region (point) (point-max)))
1668
1669 ;; Various stopping points
1670 ((or
1671
1672 ;; Stop before ALL CAPS acronyms, if preceded by mixed-case
1673 ;; words. Example: XT-DEM.
1674 (and (>= word-count 2)
1675 mixed-case-flag
1676 (looking-at mail-extr-weird-acronym-pattern)
1677 (not (looking-at mail-extr-roman-numeral-pattern)))
1678
1679 ;; Stop before trailing alternative address
1680 (looking-at mail-extr-alternative-address-pattern)
1681
1682 ;; Stop before trailing comment not introduced by comma
1683 ;; THIS CASE MUST BE AFTER AN EARLIER CASE.
1684 (looking-at mail-extr-trailing-comment-start-pattern)
1685
1686 ;; Stop before telephone numbers
1687 (and (>= word-count 1)
1688 (looking-at mail-extr-telephone-extension-pattern)))
1689 (setq name-done-flag t))
1690
1691 ;; Delete ham radio call signs
1692 ((looking-at mail-extr-ham-call-sign-pattern)
1693 (delete-region (match-beginning 0) (match-end 0)))
1694
1695 ;; Fixup initials
1696 ((looking-at mail-extr-initial-pattern)
1697 (or (eq (following-char) (upcase (following-char)))
72c0ae01 1698 (setq lower-case-flag t))
2d8a5449
KH
1699 (forward-char 1)
1700 (if (eq ?. (following-char))
1701 (forward-char 1)
1702 (insert ?.))
1703 (or (eq ?\ (following-char))
1704 (insert ?\ ))
1705 (setq word-found-flag t))
1706
1707 ;; Handle BITNET LISTSERV list names.
1708 ((and (eq word-count 0)
1709 (looking-at mail-extr-listserv-list-name-pattern))
1710 (narrow-to-region (match-beginning 1) (match-end 1))
1711 (setq word-found-flag t)
1712 (setq name-done-flag t))
1713
1714 ;; Handle & substitution, when & is last and is not first.
1715 ((and (> word-count 0)
1716 (eq ?\ (preceding-char))
1717 (eq (following-char) ?&)
1718 (eq (1+ (point)) (point-max)))
1719 (delete-char 1)
1720 (capitalize-region
1721 (point)
1722 (progn
1723 (insert-buffer-substring canonicalization-buffer
1724 mbox-beg mbox-end)
1725 (point)))
1726 (setq disable-initial-guessing-flag t)
1727 (setq word-found-flag t))
1728
1729 ;; Handle & between names, as in "Bob & Susie".
1730 ((and (> word-count 0) (eq (following-char) ?\&))
1731 (setq name-beg (point))
1732 (setq name-end (1+ name-beg))
1733 (setq word-found-flag t)
1734 (goto-char name-end))
1735
1736 ;; Regular name words
1737 ((looking-at mail-extr-name-pattern)
1738 (setq name-beg (point))
1739 (setq name-end (match-end 0))
1740
1741 ;; Certain words will be dropped if they are at the end.
1742 (and (>= word-count 2)
1743 (not lower-case-flag)
1744 (or
1745 ;; Trailing 4-or-more letter lowercase words preceded by
1746 ;; mixed case or uppercase words will be dropped.
1747 (looking-at "[[:lower:]]\\{4,\\}[ \t]*\\'")
1748 ;; Drop a trailing word which is terminated with a period.
1749 (eq ?. (char-after (1- name-end))))
1750 (setq drop-this-word-if-trailing-flag t))
1751
1752 ;; Set the flags that indicate whether we have seen a lowercase
1753 ;; word, a mixed case word, and an uppercase word.
1754 (if (re-search-forward "[[:lower:]]" name-end t)
1755 (if (progn
1756 (goto-char name-beg)
1757 (re-search-forward "[[:upper:]]" name-end t))
1758 (setq mixed-case-flag t)
1759 (setq lower-case-flag t))
1760 ;; (setq upper-case-flag t)
1761 )
6c83d99f 1762
2d8a5449
KH
1763 (goto-char name-end)
1764 (setq word-found-flag t))
72c0ae01 1765
2d8a5449
KH
1766 ;; Allow a number as a word, if it doesn't mean anything else.
1767 ((looking-at "[0-9]+\\>")
1768 (setq name-beg (point))
1769 (setq name-end (match-end 0))
1770 (goto-char name-end)
1771 (setq word-found-flag t))
1772
1773 (t
1774 (setq name-done-flag t)
1775 ))
1776
1777 ;; Count any word that we skipped over.
1778 (if word-found-flag
1779 (setq word-count (1+ word-count))))
1780
1781 ;; If the last thing in the name is 2 or more periods, or one or more
1782 ;; other sentence terminators (but not a single period) then keep them
1783 ;; and the preceding word. This is for the benefit of whole sentences
1784 ;; in the name field: it's better behavior than dropping the last word
1785 ;; of the sentence...
1786 (if (and (not suffix-flag)
1787 (looking-at "\\(\\.+\\|[?!;:.][?!;:.]+\\|[?!;:][?!;:.]*\\)\\'"))
1788 (goto-char (setq suffix-flag (point-max))))
1789
1790 ;; Drop everything after point and certain trailing words.
1791 (narrow-to-region (point-min)
1792 (or (and drop-last-word-if-trailing-flag
1793 last-word-beg)
1794 (point)))
1795
1796 ;; Xerox's mailers SUCK!!!!!!
1797 ;; We simply refuse to believe that any last name is PARC or ADOC.
1798 ;; If it looks like that is the last name, that there is no meaningful
1799 ;; here at all. Actually I guess it would be best to map patterns
1800 ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't
1801 ;; actually know that that is what's going on.
1802 (unless suffix-flag
1803 (goto-char (point-min))
1804 (let ((case-fold-search t))
1805 (if (looking-at "[-A-Za-z_]+[. ]\\(PARC\\|ADOC\\)\\'")
1806 (erase-buffer))))
1807
1808 ;; If last name first put it at end (but before suffix)
1809 (when last-name-comma-flag
1810 (goto-char (point-min))
1811 (search-forward ",")
1812 (setq name-end (1- (point)))
1813 (goto-char (or suffix-flag (point-max)))
1814 (or (eq ?\ (preceding-char))
1815 (insert ?\ ))
1816 (insert-buffer-substring (current-buffer) (point-min) name-end)
af604656 1817 (goto-char name-end)
2d8a5449
KH
1818 (skip-chars-forward "\t ,")
1819 (narrow-to-region (point) (point-max)))
154b3e39 1820
2d8a5449
KH
1821 ;; Delete leading and trailing junk characters.
1822 ;; *** This is probably completely unneeded now.
1823 ;;(goto-char (point-max))
1824 ;;(skip-chars-backward mail-extr-non-end-name-chars)
1825 ;;(if (eq ?. (following-char))
1826 ;; (forward-char 1))
1827 ;;(narrow-to-region (point)
1828 ;; (progn
1829 ;; (goto-char (point-min))
1830 ;; (skip-chars-forward mail-extr-non-begin-name-chars)
1831 ;; (point)))
1832
1833 ;; Compress whitespace
d980c402 1834 (goto-char (point-min))
2d8a5449
KH
1835 (while (re-search-forward "[ \t\n]+" nil t)
1836 (replace-match (if (eobp) "" " ") t))
1837 ))))
72c0ae01 1838
154b3e39 1839\f
72c0ae01 1840
154b3e39
RS
1841;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1842;;
1843;; Table of top-level domain names.
1844;;
1845;; This is used during address canonicalization; be careful of format changes.
72c0ae01
ER
1846;; Keep in mind that the country abbreviations follow ISO-3166. There is
1847;; a U.S. FIPS that specifies a different set of two-letter country
1848;; abbreviations.
97f83272
SJ
1849;;
1850;; Updated by the RIPE Network Coordination Centre.
1851;;
1852;; Source: ISO 3166 Maintenance Agency
1853;; http://www.iso.org/iso/en/prods-services/iso3166ma/02iso-3166-code-lists/list-en1-semic.txt
1854;; http://www.iana.org/domain-names.htm
1855;; http://www.iana.org/cctld/cctld-whois.htm
78a4e22e 1856;; Latest change: 2007/11/15
72c0ae01 1857
154b3e39 1858(defconst mail-extr-all-top-level-domains
0b725d8c 1859 (let ((ob (make-vector 739 0)))
d980c402
SM
1860 (mapc
1861 (lambda (x)
1862 (put (intern (downcase (car x)) ob)
1863 'domain-name
1864 (if (nth 2 x)
1865 (format (nth 2 x) (nth 1 x))
1866 (nth 1 x))))
e1a34b58
RS
1867 '(
1868 ;; ISO 3166 codes:
78a4e22e 1869 ("ac" "Ascension Island")
44248b7f 1870 ("ad" "Andorra")
e1a34b58 1871 ("ae" "United Arab Emirates")
0b725d8c 1872 ("af" "Afghanistan")
e1a34b58 1873 ("ag" "Antigua and Barbuda")
0b725d8c 1874 ("ai" "Anguilla")
e1a34b58 1875 ("al" "Albania")
44248b7f 1876 ("am" "Armenia")
0b725d8c 1877 ("an" "Netherlands Antilles")
e1a34b58
RS
1878 ("ao" "Angola")
1879 ("aq" "Antarctica") ; continent
154b3e39 1880 ("ar" "Argentina" "Argentine Republic")
0b725d8c 1881 ("as" "American Samoa")
154b3e39
RS
1882 ("at" "Austria" "The Republic of %s")
1883 ("au" "Australia")
0b725d8c 1884 ("aw" "Aruba")
78a4e22e 1885 ("ax" "Aland Islands")
e1a34b58 1886 ("az" "Azerbaijan")
44248b7f 1887 ("ba" "Bosnia-Herzegovina")
154b3e39 1888 ("bb" "Barbados")
e1a34b58 1889 ("bd" "Bangladesh")
154b3e39 1890 ("be" "Belgium" "The Kingdom of %s")
e1a34b58 1891 ("bf" "Burkina Faso")
154b3e39 1892 ("bg" "Bulgaria")
e1a34b58 1893 ("bh" "Bahrain")
0b725d8c
GM
1894 ("bi" "Burundi")
1895 ("bj" "Benin")
78a4e22e 1896 ("bl" "Saint Barthelemy")
e1a34b58 1897 ("bm" "Bermuda")
0b725d8c 1898 ("bn" "Brunei Darussalam")
154b3e39
RS
1899 ("bo" "Bolivia" "Republic of %s")
1900 ("br" "Brazil" "The Federative Republic of %s")
1901 ("bs" "Bahamas")
0b725d8c
GM
1902 ("bt" "Bhutan")
1903 ("bv" "Bouvet Island")
e1a34b58
RS
1904 ("bw" "Botswana")
1905 ("by" "Belarus")
154b3e39
RS
1906 ("bz" "Belize")
1907 ("ca" "Canada")
0b725d8c 1908 ("cc" "Cocos (Keeling) Islands")
97f83272 1909 ("cd" "Congo" "The Democratic Republic of the %s")
0b725d8c 1910 ("cf" "Central African Republic")
e1a34b58 1911 ("cg" "Congo")
154b3e39 1912 ("ch" "Switzerland" "The Swiss Confederation")
0b725d8c
GM
1913 ("ci" "Ivory Coast") ; Cote D'ivoire
1914 ("ck" "Cook Islands")
154b3e39 1915 ("cl" "Chile" "The Republic of %s")
e1a34b58 1916 ("cm" "Cameroon") ; In .fr domain
154b3e39 1917 ("cn" "China" "The People's Republic of %s")
e1a34b58 1918 ("co" "Colombia")
154b3e39 1919 ("cr" "Costa Rica" "The Republic of %s")
e1a34b58 1920 ("cu" "Cuba")
0b725d8c
GM
1921 ("cv" "Cape Verde")
1922 ("cx" "Christmas Island")
e1a34b58
RS
1923 ("cy" "Cyprus")
1924 ("cz" "Czech Republic")
154b3e39 1925 ("de" "Germany")
0b725d8c 1926 ("dj" "Djibouti")
154b3e39
RS
1927 ("dk" "Denmark")
1928 ("dm" "Dominica")
1929 ("do" "Dominican Republic" "The %s")
e1a34b58 1930 ("dz" "Algeria")
154b3e39 1931 ("ec" "Ecuador" "The Republic of %s")
e1a34b58 1932 ("ee" "Estonia")
154b3e39 1933 ("eg" "Egypt" "The Arab Republic of %s")
0b725d8c 1934 ("eh" "Western Sahara")
e1a34b58 1935 ("er" "Eritrea")
154b3e39 1936 ("es" "Spain" "The Kingdom of %s")
44248b7f 1937 ("et" "Ethiopia")
78a4e22e 1938 ("eu" "European Union")
154b3e39 1939 ("fi" "Finland" "The Republic of %s")
0b725d8c
GM
1940 ("fj" "Fiji")
1941 ("fk" "Falkland Islands (Malvinas)")
1942 ("fm" "Micronesia" "Federated States of %s")
e1a34b58 1943 ("fo" "Faroe Islands")
154b3e39 1944 ("fr" "France")
44248b7f
KH
1945 ("ga" "Gabon")
1946 ("gb" "United Kingdom")
e1a34b58
RS
1947 ("gd" "Grenada")
1948 ("ge" "Georgia")
0b725d8c 1949 ("gf" "French Guiana")
78a4e22e 1950 ("gg" "Guernsey")
0b725d8c
GM
1951 ("gh" "Ghana")
1952 ("gi" "Gibraltar")
44248b7f
KH
1953 ("gl" "Greenland")
1954 ("gm" "Gambia")
0b725d8c 1955 ("gn" "Guinea")
e1a34b58 1956 ("gp" "Guadeloupe (Fr.)")
0b725d8c 1957 ("gq" "Equatorial Guinea")
154b3e39 1958 ("gr" "Greece" "The Hellenic Republic (%s)")
97f83272 1959 ("gs" "South Georgia and The South Sandwich Islands")
e1a34b58
RS
1960 ("gt" "Guatemala")
1961 ("gu" "Guam (U.S.)")
0b725d8c
GM
1962 ("gw" "Guinea-Bissau")
1963 ("gy" "Guyana")
154b3e39 1964 ("hk" "Hong Kong")
09e80d9f 1965 ("hm" "Heard Island and McDonald Islands")
e1a34b58 1966 ("hn" "Honduras")
44248b7f 1967 ("hr" "Croatia" "Croatia (Hrvatska)")
e1a34b58 1968 ("ht" "Haiti")
44248b7f 1969 ("hu" "Hungary" "The Hungarian Republic")
e1a34b58 1970 ("id" "Indonesia")
154b3e39
RS
1971 ("ie" "Ireland")
1972 ("il" "Israel" "The State of %s")
0b725d8c 1973 ("im" "Isle of Man" "The %s") ; NOT in ISO 3166-1 of 2001-02-26
154b3e39 1974 ("in" "India" "The Republic of %s")
0b725d8c
GM
1975 ("io" "British Indian Ocean Territory")
1976 ("iq" "Iraq")
1977 ("ir" "Iran" "Islamic Republic of %s")
154b3e39
RS
1978 ("is" "Iceland" "The Republic of %s")
1979 ("it" "Italy" "The Italian Republic")
78a4e22e 1980 ("je" "Jersey")
154b3e39 1981 ("jm" "Jamaica")
44248b7f 1982 ("jo" "Jordan")
154b3e39 1983 ("jp" "Japan")
e1a34b58 1984 ("ke" "Kenya")
0b725d8c
GM
1985 ("kg" "Kyrgyzstan")
1986 ("kh" "Cambodia")
1987 ("ki" "Kiribati")
1988 ("km" "Comoros")
1989 ("kn" "Saint Kitts and Nevis")
1990 ("kp" "Korea (North)" "Democratic People's Republic of Korea")
1991 ("kr" "Korea (South)" "Republic of Korea")
e1a34b58 1992 ("kw" "Kuwait")
0b725d8c 1993 ("ky" "Cayman Islands")
97f83272 1994 ("kz" "Kazakhstan")
0b725d8c 1995 ("la" "Lao People's Democratic Republic")
e1a34b58 1996 ("lb" "Lebanon")
0b725d8c 1997 ("lc" "Saint Lucia")
e1a34b58 1998 ("li" "Liechtenstein")
154b3e39 1999 ("lk" "Sri Lanka" "The Democratic Socialist Republic of %s")
0b725d8c 2000 ("lr" "Liberia")
e1a34b58
RS
2001 ("ls" "Lesotho")
2002 ("lt" "Lithuania")
2003 ("lu" "Luxembourg")
2004 ("lv" "Latvia")
0b725d8c 2005 ("ly" "Libyan Arab Jamahiriya")
e1a34b58 2006 ("ma" "Morocco")
44248b7f
KH
2007 ("mc" "Monaco")
2008 ("md" "Moldova" "The Republic of %s")
78a4e22e
GM
2009 ("me" "Montenegro")
2010 ("mf" "Saint Martin (French part)")
e1a34b58 2011 ("mg" "Madagascar")
0b725d8c
GM
2012 ("mh" "Marshall Islands")
2013 ("mk" "Macedonia" "The Former Yugoslav Republic of %s")
e1a34b58 2014 ("ml" "Mali")
0b725d8c
GM
2015 ("mm" "Myanmar")
2016 ("mn" "Mongolia")
97f83272 2017 ("mo" "Macao")
0b725d8c
GM
2018 ("mp" "Northern Mariana Islands")
2019 ("mq" "Martinique")
2020 ("mr" "Mauritania")
2021 ("ms" "Montserrat")
e1a34b58
RS
2022 ("mt" "Malta")
2023 ("mu" "Mauritius")
44248b7f 2024 ("mv" "Maldives")
e1a34b58 2025 ("mw" "Malawi")
154b3e39 2026 ("mx" "Mexico" "The United Mexican States")
97f83272 2027 ("my" "Malaysia")
e1a34b58 2028 ("mz" "Mozambique")
154b3e39 2029 ("na" "Namibia")
e1a34b58
RS
2030 ("nc" "New Caledonia (Fr.)")
2031 ("ne" "Niger") ; In .fr domain
0b725d8c
GM
2032 ("nf" "Norfolk Island")
2033 ("ng" "Nigeria")
154b3e39
RS
2034 ("ni" "Nicaragua" "The Republic of %s")
2035 ("nl" "Netherlands" "The Kingdom of the %s")
2036 ("no" "Norway" "The Kingdom of %s")
e1a34b58 2037 ("np" "Nepal") ; Via .in domain
0b725d8c 2038 ("nr" "Nauru")
44248b7f 2039 ("nu" "Niue")
154b3e39 2040 ("nz" "New Zealand")
0b725d8c 2041 ("om" "Oman")
e1a34b58 2042 ("pa" "Panama")
154b3e39 2043 ("pe" "Peru")
97f83272 2044 ("pf" "French Polynesia")
154b3e39
RS
2045 ("pg" "Papua New Guinea")
2046 ("ph" "Philippines" "The Republic of the %s")
e1a34b58 2047 ("pk" "Pakistan")
154b3e39 2048 ("pl" "Poland")
0b725d8c
GM
2049 ("pm" "Saint Pierre and Miquelon")
2050 ("pn" "Pitcairn")
e1a34b58 2051 ("pr" "Puerto Rico (U.S.)")
0b725d8c 2052 ("ps" "Palestinian Territory, Occupied")
4802af76 2053 ("pt" "Portugal" "The Portuguese Republic")
0b725d8c 2054 ("pw" "Palau")
154b3e39 2055 ("py" "Paraguay")
44248b7f 2056 ("qa" "Qatar")
e1a34b58
RS
2057 ("re" "Reunion (Fr.)") ; In .fr domain
2058 ("ro" "Romania")
78a4e22e 2059 ("rs" "Serbia")
0b725d8c
GM
2060 ("ru" "Russia" "Russian Federation")
2061 ("rw" "Rwanda")
e1a34b58 2062 ("sa" "Saudi Arabia")
0b725d8c 2063 ("sb" "Solomon Islands")
e1a34b58
RS
2064 ("sc" "Seychelles")
2065 ("sd" "Sudan")
154b3e39
RS
2066 ("se" "Sweden" "The Kingdom of %s")
2067 ("sg" "Singapore" "The Republic of %s")
0b725d8c 2068 ("sh" "Saint Helena")
e1a34b58 2069 ("si" "Slovenia")
0b725d8c 2070 ("sj" "Svalbard and Jan Mayen") ; In .no domain
e1a34b58 2071 ("sk" "Slovakia" "The Slovak Republic")
0b725d8c 2072 ("sl" "Sierra Leone")
44248b7f 2073 ("sm" "San Marino")
e1a34b58 2074 ("sn" "Senegal")
0b725d8c 2075 ("so" "Somalia")
154b3e39 2076 ("sr" "Suriname")
97f83272 2077 ("st" "Sao Tome and Principe")
0b725d8c
GM
2078 ("su" "U.S.S.R." "The Union of Soviet Socialist Republics")
2079 ("sv" "El Salvador")
2080 ("sy" "Syrian Arab Republic")
e1a34b58 2081 ("sz" "Swaziland")
97f83272 2082 ("tc" "Turks and Caicos Islands")
0b725d8c
GM
2083 ("td" "Chad")
2084 ("tf" "French Southern Territories")
e1a34b58 2085 ("tg" "Togo")
154b3e39 2086 ("th" "Thailand" "The Kingdom of %s")
0b725d8c
GM
2087 ("tj" "Tajikistan")
2088 ("tk" "Tokelau")
97f83272 2089 ("tl" "East Timor")
0b725d8c 2090 ("tm" "Turkmenistan")
154b3e39 2091 ("tn" "Tunisia")
44248b7f 2092 ("to" "Tonga")
0b725d8c 2093 ("tp" "East Timor")
154b3e39
RS
2094 ("tr" "Turkey" "The Republic of %s")
2095 ("tt" "Trinidad and Tobago")
0b725d8c
GM
2096 ("tv" "Tuvalu")
2097 ("tw" "Taiwan" "%s, Province of China")
2098 ("tz" "Tanzania" "United Republic of %s")
e1a34b58 2099 ("ua" "Ukraine")
0b725d8c 2100 ("ug" "Uganda")
44a1338a 2101 ("uk" "United Kingdom" "The %s of Great Britain and Northern Ireland")
0b725d8c 2102 ("um" "United States Minor Outlying Islands")
e1a34b58 2103 ("us" "United States" "The %s of America")
154b3e39 2104 ("uy" "Uruguay" "The Eastern Republic of %s")
0b725d8c
GM
2105 ("uz" "Uzbekistan")
2106 ("va" "Holy See (Vatican City State)")
97f83272 2107 ("vc" "Saint Vincent and the Grenadines")
154b3e39 2108 ("ve" "Venezuela" "The Republic of %s")
0b725d8c
GM
2109 ("vg" "Virgin Islands, British")
2110 ("vi" "Virgin Islands, U.S.")
e1a34b58
RS
2111 ("vn" "Vietnam")
2112 ("vu" "Vanuatu")
0b725d8c
GM
2113 ("wf" "Wallis and Futuna")
2114 ("ws" "Samoa")
2115 ("ye" "Yemen")
2116 ("yt" "Mayotte")
44248b7f
KH
2117 ("yu" "Yugoslavia" "Yugoslavia, AKA Serbia-Montenegro")
2118 ("za" "South Africa" "The Republic of %s")
0b725d8c 2119 ("zm" "Zambia")
154b3e39 2120 ("zw" "Zimbabwe" "Republic of %s")
97f83272
SJ
2121 ;; Generic Domains:
2122 ("aero" t "Air Transport Industry")
78a4e22e 2123 ("asia" t "Pan-Asia and Asia Pacific community")
97f83272 2124 ("biz" t "Businesses")
78a4e22e 2125 ("cat" t "Catalan language and culture")
e1a34b58 2126 ("com" t "Commercial")
97f83272
SJ
2127 ("coop" t "Cooperative Associations")
2128 ("info" t "Info")
78a4e22e
GM
2129 ("jobs" t "Employment")
2130 ("mobi" t "Mobile products")
97f83272
SJ
2131 ("museum" t "Museums")
2132 ("name" t "Individuals")
e1a34b58
RS
2133 ("net" t "Network")
2134 ("org" t "Non-profit Organization")
78a4e22e
GM
2135 ("pro" t "Credentialed professionals")
2136 ("tel" t "Contact data")
2137 ("travel" t "Travel industry")
97f83272
SJ
2138 ;;("bitnet" t "Because It's Time NET")
2139 ("gov" t "United States Government")
2140 ("edu" t "Educational")
2141 ("mil" t "United States Military")
2142 ("int" t "International Treaties")
2143 ;;("nato" t "North Atlantic Treaty Organization")
e1a34b58 2144 ("uucp" t "Unix to Unix CoPy")
97f83272
SJ
2145 ;; Infrastructure Domains:
2146 ("arpa" t "Advanced Research Projects Agency (U.S. DoD)")
154b3e39
RS
2147 ))
2148 ob))
2149
2150;;;###autoload
2151(defun what-domain (domain)
e1cd65fd 2152 "Convert mail domain DOMAIN to the country it corresponds to."
154b3e39
RS
2153 (interactive
2154 (let ((completion-ignore-case t))
2155 (list (completing-read "Domain: "
2156 mail-extr-all-top-level-domains nil t))))
2157 (or (setq domain (intern-soft (downcase domain)
2158 mail-extr-all-top-level-domains))
29565a87 2159 (error "No such domain"))
154b3e39 2160 (message "%s: %s" (upcase (symbol-name domain)) (get domain 'domain-name)))
72c0ae01
ER
2161
2162\f
154b3e39
RS
2163;(let ((all nil))
2164; (mapatoms #'(lambda (x)
6c83d99f 2165; (if (and (boundp x)
154b3e39
RS
2166; (string-match "^mail-extr-" (symbol-name x)))
2167; (setq all (cons x all)))))
2168; (setq all (sort all #'string-lessp))
2169; (cons 'setq
2170; (apply 'nconc (mapcar #'(lambda (x)
2171; (list x (symbol-value x)))
2172; all))))
72c0ae01
ER
2173
2174\f
154b3e39 2175(provide 'mail-extr)
72c0ae01
ER
2176
2177;;; mail-extr.el ends here