(ispell-local-dictionary-alist): Add
[bpt/emacs.git] / lisp / mail / mail-extr.el
CommitLineData
b1ccc1ce 1;;; mail-extr.el --- extract full name and address from RFC 822 mail header -*- coding: utf-8 -*-
72c0ae01 2
0b725d8c
GM
3;; Copyright (C) 1991, 1992, 1993, 1994, 1997, 2001
4;; Free Software Foundation, Inc.
3a801d0c 5
72c0ae01 6;; Author: Joe Wells <jbw@cs.bu.edu>
d1782bd8 7;; Maintainer: FSF
72c0ae01
ER
8;; Keywords: mail
9
72c0ae01
ER
10;; This file is part of GNU Emacs.
11
12;; GNU Emacs is free software; you can redistribute it and/or modify
13;; it under the terms of the GNU General Public License as published by
154b3e39 14;; the Free Software Foundation; either version 2, or (at your option)
72c0ae01
ER
15;; any later version.
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
b578f267
EN
23;; along with GNU Emacs; see the file COPYING. If not, write to the
24;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25;; Boston, MA 02111-1307, USA.
72c0ae01
ER
26
27;;; Commentary:
28
154b3e39
RS
29;; The entry point of this code is
30;;
d1782bd8 31;; mail-extract-address-components: (address &optional all)
6c83d99f 32;;
154b3e39
RS
33;; Given an RFC-822 ADDRESS, extract full name and canonical address.
34;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
35;; If no name can be extracted, FULL-NAME will be nil.
6c83d99f 36;; ADDRESS may be a string or a buffer. If it is a buffer, the visible
154b3e39
RS
37;; (narrowed) portion of the buffer will be interpreted as the address.
38;; (This feature exists so that the clever caller might be able to avoid
39;; consing a string.)
40;; If ADDRESS contains more than one RFC-822 address, only the first is
41;; returned.
42;;
d1782bd8
KH
43;; If ALL is non-nil, that means return info about all the addresses
44;; that are found in ADDRESS. The value is a list of elements of
45;; the form (FULL-NAME CANONICAL-ADDRESS), one per address.
46;;
154b3e39
RS
47;; This code is more correct (and more heuristic) parser than the code in
48;; rfc822.el. And despite its size, it's fairly fast.
49;;
72c0ae01 50;; There are two main benefits:
154b3e39 51;;
72c0ae01 52;; 1. Higher probability of getting the correct full name for a human than
154b3e39 53;; any other package we know of. (On the other hand, it will cheerfully
72c0ae01
ER
54;; mangle non-human names/comments.)
55;; 2. Address part is put in a canonical form.
154b3e39
RS
56;;
57;; The interface is not yet carved in stone; please give us suggestions.
58;;
59;; We have an extensive test-case collection of funny addresses if you want to
72c0ae01 60;; work with the code. Developing this code requires frequent testing to
154b3e39
RS
61;; make sure you're not breaking functionality. The test cases aren't included
62;; because they are over 100K.
63;;
6c83d99f 64;; If you find an address that mail-extr fails on, please send it to the
154b3e39
RS
65;; maintainer along with what you think the correct results should be. We do
66;; not consider it a bug if mail-extr mangles a comment that does not
6c83d99f 67;; correspond to a real human full name, although we would prefer that
154b3e39
RS
68;; mail-extr would return the comment as-is.
69;;
72c0ae01 70;; Features:
154b3e39 71;;
72c0ae01 72;; * Full name handling:
154b3e39 73;;
72c0ae01
ER
74;; * knows where full names can be found in an address.
75;; * avoids using empty comments and quoted text.
76;; * extracts full names from mailbox names.
77;; * recognizes common formats for comments after a full name.
78;; * puts a period and a space after each initial.
154b3e39
RS
79;; * understands & referring to the mailbox name, capitalized.
80;; * strips name prefixes like "Prof.", etc.
72c0ae01
ER
81;; * understands what characters can occur in names (not just letters).
82;; * figures out middle initial from mailbox name.
83;; * removes funny nicknames.
84;; * keeps suffixes such as Jr., Sr., III, etc.
85;; * reorders "Last, First" type names.
154b3e39 86;;
72c0ae01 87;; * Address handling:
154b3e39 88;;
72c0ae01
ER
89;; * parses rfc822 quoted text, comments, and domain literals.
90;; * parses rfc822 multi-line headers.
91;; * does something reasonable with rfc822 GROUP addresses.
92;; * handles many rfc822 noncompliant and garbage addresses.
93;; * canonicalizes addresses (after stripping comments/phrases outside <>).
94;; * converts ! addresses into .UUCP and %-style addresses.
95;; * converts rfc822 ROUTE addresses to %-style addresses.
96;; * truncates %-style addresses at leftmost fully qualified domain name.
97;; * handles local relative precedence of ! vs. % and @ (untested).
154b3e39 98;;
72c0ae01
ER
99;; It does almost no string creation. It primarily uses the built-in
100;; parsing routines with the appropriate syntax tables. This should
101;; result in greater speed.
154b3e39 102;;
72c0ae01 103;; TODO:
154b3e39 104;;
72c0ae01
ER
105;; * handle all test cases. (This will take forever.)
106;; * software to pick the correct header to use (eg., "Senders-Name:").
107;; * multiple addresses in the "From:" header (almost all of the necessary
108;; code is there).
109;; * flag to not treat `,' as an address separator. (This is useful when
110;; there is a "From:" header but no "Sender:" header, because then there
111;; is only allowed to be one address.)
112;; * mailbox name does not necessarily contain full name.
113;; * fixing capitalization when it's all upper or lowercase. (Hard!)
114;; * some of the domain literal handling is missing. (But I've never even
115;; seen one of these in a mail address, so maybe no big deal.)
116;; * arrange to have syntax tables byte-compiled.
117;; * speed hacks.
118;; * delete unused variables.
119;; * arrange for testing with different relative precedences of ! vs. @
120;; and %.
72c0ae01
ER
121;; * insert documentation strings!
122;; * handle X.400-gatewayed addresses according to RFC 1148.
123
6c83d99f
JB
124;;; Change Log:
125;;
154b3e39
RS
126;; Thu Feb 17 17:57:33 1994 Jamie Zawinski (jwz@lucid.com)
127;;
128;; * merged with jbw's latest version
129;;
130;; Wed Feb 9 21:56:27 1994 Jamie Zawinski (jwz@lucid.com)
131;;
132;; * high-bit chars in comments weren't treated as word syntax
133;;
134;; Sat Feb 5 03:13:40 1994 Jamie Zawinski (jwz@lucid.com)
135;;
136;; * call replace-match with fixed-case arg
137;;
138;; Thu Dec 16 21:56:45 1993 Jamie Zawinski (jwz@lucid.com)
139;;
140;; * some more cleanup, doc, added provide
141;;
142;; Tue Mar 23 21:23:18 1993 Joe Wells (jbw at csd.bu.edu)
6c83d99f 143;;
154b3e39 144;; * Made mail-full-name-prefixes a user-customizable variable.
79814626 145;; Allow passing the address as a buffer as well as a string.
154b3e39 146;; Allow [ and ] as name characters (Finnish character set).
6c83d99f 147;;
154b3e39 148;; Mon Mar 22 21:20:56 1993 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 149;;
154b3e39
RS
150;; * Handle "null" addresses. Handle = used for spacing in mailbox
151;; name. Fix bug in handling of ROUTE-ADDR-type addresses that are
152;; missing their brackets. Handle uppercase "JR". Extract full
153;; names from X.400 addresses encoded in RFC-822. Fix bug in
154;; handling of multiple addresses where first has trailing comment.
155;; Handle more kinds of telephone extension lead-ins.
6c83d99f 156;;
154b3e39 157;; Mon Mar 22 20:16:57 1993 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 158;;
154b3e39 159;; * Handle HZ encoding for embedding GB encoded chinese characters.
6c83d99f 160;;
154b3e39 161;; Mon Mar 22 00:46:12 1993 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 162;;
154b3e39
RS
163;; * Fixed too broad matching of ham radio call signs. Fixed bug in
164;; handling an unmatched ' in a name string. Enhanced recognition
165;; of when . in the mailbox name terminates the name portion.
166;; Narrowed conversion of . to space to only the necessary
167;; situation. Deal with VMS's stupid date stamps. Handle a unique
168;; way of introducing an alternate address. Fixed spacing bug I
169;; introduced in switching last name order. Fixed bug in handling
170;; address with ! and % but no @. Narrowed the cases in which
171;; certain trailing words are discarded.
6c83d99f 172;;
154b3e39 173;; Sun Mar 21 21:41:06 1993 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 174;;
154b3e39
RS
175;; * Fixed bugs in handling GROUP addresses. Certain words in the
176;; middle of a name no longer terminate it. Handle LISTSERV list
177;; names. Ignore comment field containing mailbox name.
6c83d99f 178;;
154b3e39 179;; Sun Mar 21 14:39:38 1993 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 180;;
154b3e39
RS
181;; * Moved variant-method code back into main function. Handle
182;; underscores as spaces in comments. Handle leading nickname. Add
183;; flag to ignore single-word names. Other changes.
6c83d99f 184;;
154b3e39 185;; Mon Feb 1 22:23:31 1993 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 186;;
154b3e39
RS
187;; * Added in changes by Rod Whitby and Jamie Zawinski. This
188;; includes the flag mail-extr-guess-middle-initial and the fix for
6b07c06e
RS
189;; handling multiple addresses correctly. (Whitby just changed
190;; a > to a <.)
6c83d99f 191;;
72c0ae01 192;; Mon Apr 6 23:59:09 1992 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 193;;
72c0ae01 194;; * Cleaned up some more. Release version 1.0 to world.
6c83d99f 195;;
72c0ae01 196;; Sun Apr 5 19:39:08 1992 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 197;;
72c0ae01 198;; * Cleaned up full name extraction extensively.
6c83d99f 199;;
72c0ae01 200;; Sun Feb 2 14:45:24 1992 Joe Wells (jbw at bigbird.bu.edu)
6c83d99f 201;;
72c0ae01
ER
202;; * Total rewrite. Integrated mail-canonicalize-address into
203;; mail-extract-address-components. Now handles GROUP addresses more
204;; or less correctly. Better handling of lots of different cases.
6c83d99f 205;;
72c0ae01
ER
206;; Fri Jun 14 19:39:50 1991
207;; * Created.
208
209;;; Code:
210\f
72c0ae01 211
0b5bb3ec
SE
212(defgroup mail-extr nil
213 "Extract full name and address from RFC 822 mail header."
214 :prefix "mail-extr-"
215 :group 'mail)
216
154b3e39
RS
217;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
218;;
219;; User configuration variable definitions.
220;;
221
0b5bb3ec 222(defcustom mail-extr-guess-middle-initial nil
154b3e39
RS
223 "*Whether to try to guess middle initial from mail address.
224If true, then when we see an address like \"John Smith <jqs@host.com>\"
0b5bb3ec
SE
225we will assume that \"John Q. Smith\" is the fellow's name."
226 :type 'boolean
227 :group 'mail-extr)
154b3e39 228
2a487c6c 229(defcustom mail-extr-ignore-single-names nil
154b3e39
RS
230 "*Whether to ignore a name that is just a single word.
231If true, then when we see an address like \"Idiot <dumb@stupid.com>\"
0b5bb3ec
SE
232we will act as though we couldn't find a full name in the address."
233 :type 'boolean
ff9ed544 234 :version "21.4"
0b5bb3ec 235 :group 'mail-extr)
154b3e39
RS
236
237;; Matches a leading title that is not part of the name (does not
238;; contribute to uniquely identifying the person).
0b5bb3ec 239(defcustom mail-extr-full-name-prefixes
154b3e39
RS
240 (purecopy
241 "\\(Prof\\|D[Rr]\\|Mrs?\\|Rev\\|Rabbi\\|SysOp\\|LCDR\\)\\.?[ \t\n]")
242 "*Matches prefixes to the full name that identify a person's position.
243These are stripped from the full name because they do not contribute to
0b5bb3ec
SE
244uniquely identifying the person."
245 :type 'regexp
246 :group 'mail-extr)
154b3e39 247
0b5bb3ec
SE
248(defcustom mail-extr-@-binds-tighter-than-! nil
249 "*Whether the local mail transport agent looks at ! before @."
250 :type 'boolean
251 :group 'mail-extr)
154b3e39 252
0b5bb3ec 253(defcustom mail-extr-mangle-uucp nil
154b3e39 254 "*Whether to throw away information in UUCP addresses
0b5bb3ec
SE
255by translating things like \"foo!bar!baz@host\" into \"baz@bar.UUCP\"."
256 :type 'boolean
257 :group 'mail-extr)
72c0ae01
ER
258
259;;----------------------------------------------------------------------
260;; what orderings are meaningful?????
261;;(defvar mail-operator-precedence-list '(?! ?% ?@))
262;; Right operand of a % or a @ must be a domain name, period. No other
263;; operators allowed. Left operand of a @ is an address relative to that
264;; site.
265
266;; Left operand of a ! must be a domain name. Right operand is an
267;; arbitrary address.
268;;----------------------------------------------------------------------
269
154b3e39 270\f
72c0ae01 271
154b3e39
RS
272;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
273;;
274;; Constant definitions.
275;;
276
72c0ae01 277;; Any character that can occur in a name, not counting characters that
154b3e39
RS
278;; separate parts of a multipart name (hyphen and period).
279;; Yes, there are weird people with digits in their names.
280;; You will also notice the consideration for the
281;; Swedish/Finnish/Norwegian character set.
154b3e39 282(defconst mail-extr-all-letters-but-separators
6e66e4c6 283 (purecopy "][[:alnum:]{|}'~`"))
154b3e39
RS
284
285;; Any character that can occur in a name in an RFC822 address including
286;; the separator (hyphen and possibly period) for multipart names.
287;; #### should . be in here?
288(defconst mail-extr-all-letters
289 (purecopy (concat mail-extr-all-letters-but-separators "---")))
290
291;; Any character that can start a name.
292;; Keep this set as minimal as possible.
6e66e4c6 293(defconst mail-extr-first-letters (purecopy "[:alpha:]"))
72c0ae01
ER
294
295;; Any character that can end a name.
154b3e39 296;; Keep this set as minimal as possible.
6e66e4c6 297(defconst mail-extr-last-letters (purecopy "[:alpha:]`'."))
72c0ae01 298
af604656 299(defconst mail-extr-leading-garbage "\\W+")
72c0ae01 300
6c83d99f 301;; (defconst mail-extr-non-name-chars
154b3e39
RS
302;; (purecopy (concat "^" mail-extr-all-letters ".")))
303;; (defconst mail-extr-non-begin-name-chars
304;; (purecopy (concat "^" mail-extr-first-letters)))
305;; (defconst mail-extr-non-end-name-chars
306;; (purecopy (concat "^" mail-extr-last-letters)))
72c0ae01 307
6c83d99f 308;; Matches an initial not followed by both a period and a space.
154b3e39 309;; (defconst mail-extr-bad-initials-pattern
6c83d99f 310;; (purecopy
154b3e39
RS
311;; (format "\\(\\([^%s]\\|\\`\\)[%s]\\)\\(\\.\\([^ ]\\)\\| \\|\\([^%s .]\\)\\|\\'\\)"
312;; mail-extr-all-letters mail-extr-first-letters mail-extr-all-letters)))
72c0ae01
ER
313
314;; Matches periods used instead of spaces. Must not match the period
315;; following an initial.
154b3e39
RS
316(defconst mail-extr-bad-dot-pattern
317 (purecopy
318 (format "\\([%s][%s]\\)\\.+\\([%s]\\)"
319 mail-extr-all-letters
320 mail-extr-last-letters
321 mail-extr-first-letters)))
72c0ae01
ER
322
323;; Matches an embedded or leading nickname that should be removed.
154b3e39
RS
324;; (defconst mail-extr-nickname-pattern
325;; (purecopy
326;; (format "\\([ .]\\|\\`\\)[\"'`\[\(]\\([ .%s]+\\)[\]\"'\)] "
327;; mail-extr-all-letters)))
72c0ae01
ER
328
329;; Matches the occurrence of a generational name suffix, and the last
154b3e39
RS
330;; character of the preceding name. This is important because we want to
331;; keep such suffixes: they help to uniquely identify the person.
332;; *** Perhaps this should be a user-customizable variable. However, the
333;; *** regular expression is fairly tricky to alter, so maybe not.
334(defconst mail-extr-full-name-suffix-pattern
335 (purecopy
336 (format
337 "\\(,? ?\\([JjSs][Rr]\\.?\\|V?I+V?\\)\\)\\([^%s]\\([^%s]\\|\\'\\)\\|\\'\\)"
338 mail-extr-all-letters mail-extr-all-letters)))
339
340(defconst mail-extr-roman-numeral-pattern (purecopy "V?I+V?\\b"))
72c0ae01
ER
341
342;; Matches a trailing uppercase (with other characters possible) acronym.
343;; Must not match a trailing uppercase last name or trailing initial
154b3e39
RS
344(defconst mail-extr-weird-acronym-pattern
345 (purecopy "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)"))
6c83d99f 346
72c0ae01 347;; Matches a mixed-case or lowercase name (not an initial).
154b3e39
RS
348;; #### Match Latin1 lower case letters here too?
349;; (defconst mail-extr-mixed-case-name-pattern
350;; (purecopy
351;; (format
352;; "\\b\\([a-z][%s]*[%s]\\|[%s][%s]*[a-z][%s]*[%s]\\|[%s][%s]*[a-z]\\)"
353;; mail-extr-all-letters mail-extr-last-letters
354;; mail-extr-first-letters mail-extr-all-letters mail-extr-all-letters
355;; mail-extr-last-letters mail-extr-first-letters mail-extr-all-letters)))
72c0ae01
ER
356
357;; Matches a trailing alternative address.
154b3e39 358;; #### Match Latin1 letters here too?
6c83d99f 359;; #### Match _ before @ here too?
154b3e39
RS
360(defconst mail-extr-alternative-address-pattern
361 (purecopy "\\(aka *\\)?[a-zA-Z.]+[!@][a-zA-Z.]"))
72c0ae01
ER
362
363;; Matches a variety of trailing comments not including comma-delimited
364;; comments.
154b3e39
RS
365(defconst mail-extr-trailing-comment-start-pattern
366 (purecopy " [-{]\\|--\\|[+@#></\;]"))
72c0ae01
ER
367
368;; Matches a name (not an initial).
369;; This doesn't force a word boundary at the end because sometimes a
370;; comment is separated by a `-' with no preceding space.
154b3e39
RS
371(defconst mail-extr-name-pattern
372 (purecopy (format "\\b[%s][%s]*[%s]"
373 mail-extr-first-letters
374 mail-extr-all-letters
375 mail-extr-last-letters)))
72c0ae01 376
154b3e39
RS
377(defconst mail-extr-initial-pattern
378 (purecopy (format "\\b[%s]\\([. ]\\|\\b\\)" mail-extr-first-letters)))
72c0ae01
ER
379
380;; Matches a single name before a comma.
154b3e39
RS
381;; (defconst mail-extr-last-name-first-pattern
382;; (purecopy (concat "\\`" mail-extr-name-pattern ",")))
72c0ae01
ER
383
384;; Matches telephone extensions.
154b3e39
RS
385(defconst mail-extr-telephone-extension-pattern
386 (purecopy
387 "\\(\\([Ee]xt\\|\\|[Tt]ph\\|[Tt]el\\|[Xx]\\).?\\)? *\\+?[0-9][- 0-9]+"))
72c0ae01
ER
388
389;; Matches ham radio call signs.
154b3e39
RS
390;; Help from: Mat Maessen N2NJZ <maessm@rpi.edu>, Mark Feit
391;; <mark@era.com>, Michael Covington <mcovingt@ai.uga.edu>.
392;; Examples: DX504 DX515 K5MRU K8DHK KA9WGN KA9WGN KD3FU KD6EUI KD6HBW
393;; KE9TV KF0NV N1API N3FU N3GZE N3IGS N4KCC N7IKQ N9HHU W4YHF W6ANK WA2SUH
394;; WB7VZI N2NJZ NR3G KJ4KK AB4UM AL7NI KH6OH WN3KBT N4TMI W1A N0NZO
395(defconst mail-extr-ham-call-sign-pattern
396 (purecopy "\\b\\(DX[0-9]+\\|[AKNW][A-Z]?[0-9][A-Z][A-Z]?[A-Z]?\\)"))
397
398;; Possible trailing suffixes: "\\(/\\(KT\\|A[AEG]\\|[R0-9]\\)\\)?"
399;; /KT == Temporary Technician (has CSC but not "real" license)
400;; /AA == Temporary Advanced
401;; /AE == Temporary Extra
402;; /AG == Temporary General
403;; /R == repeater
404;; /# == stations operating out of home district
405;; I don't include these in the regexp above because I can't imagine
406;; anyone putting them with their name in an e-mail address.
72c0ae01
ER
407
408;; Matches normal single-part name
154b3e39
RS
409(defconst mail-extr-normal-name-pattern
410 (purecopy (format "\\b[%s][%s]+[%s]"
411 mail-extr-first-letters
412 mail-extr-all-letters-but-separators
413 mail-extr-last-letters)))
414
415;; Matches a single word name.
416;; (defconst mail-extr-one-name-pattern
417;; (purecopy (concat "\\`" mail-extr-normal-name-pattern "\\'")))
6c83d99f 418
72c0ae01 419;; Matches normal two names with missing middle initial
154b3e39
RS
420;; The first name is not allowed to have a hyphen because this can cause
421;; false matches where the "middle initial" is actually the first letter
422;; of the second part of the first name.
423(defconst mail-extr-two-name-pattern
424 (purecopy
425 (concat "\\`\\(" mail-extr-normal-name-pattern
426 "\\|" mail-extr-initial-pattern
427 "\\) +\\(" mail-extr-name-pattern "\\)\\(,\\|\\'\\)")))
428
429(defconst mail-extr-listserv-list-name-pattern
430 (purecopy "Multiple recipients of list \\([-A-Z]+\\)"))
431
432(defconst mail-extr-stupid-vms-date-stamp-pattern
433 (purecopy
434 "[0-9][0-9]-[JFMASOND][aepuco][nbrylgptvc]-[0-9][0-9][0-9][0-9] [0-9]+ *"))
435
436;;; HZ -- GB (PRC Chinese character encoding) in ASCII embedding protocol
437;;
438;; In ASCII mode, a byte is interpreted as an ASCII character, unless a '~' is
439;; encountered. The character '~' is an escape character. By convention, it
440;; must be immediately followed ONLY by '~', '{' or '\n' (<LF>), with the
441;; following special meaning.
6c83d99f 442;;
154b3e39
RS
443;; o The escape sequence '~~' is interpreted as a '~'.
444;; o The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
445;; o The escape sequence '~\n' is a line-continuation marker to be consumed
446;; with no output produced.
6c83d99f 447;;
154b3e39
RS
448;; In GB mode, characters are interpreted two bytes at a time as (pure) GB
449;; codes until the escape-from-GB code '~}' is read. This code switches the
450;; mode from GB back to ASCII. (Note that the escape-from-GB code '~}'
451;; ($7E7D) is outside the defined GB range.)
452(defconst mail-extr-hz-embedded-gb-encoded-chinese-pattern
453 (purecopy "~{\\([^~].\\|~[^\}]\\)+~}"))
454
455;; The leading optional lowercase letters are for a bastardized version of
456;; the encoding, as is the optional nature of the final slash.
457(defconst mail-extr-x400-encoded-address-pattern
458 (purecopy "[a-z]?[a-z]?\\(/[A-Za-z]+\\(\\.[A-Za-z]+\\)?=[^/]+\\)+/?\\'"))
459
460(defconst mail-extr-x400-encoded-address-field-pattern-format
461 (purecopy "/%s=\\([^/]+\\)\\(/\\|\\'\\)"))
462
463(defconst mail-extr-x400-encoded-address-surname-pattern
464 ;; S stands for Surname (family name).
465 (purecopy
466 (format mail-extr-x400-encoded-address-field-pattern-format "[Ss]")))
467
468(defconst mail-extr-x400-encoded-address-given-name-pattern
469 ;; G stands for Given name.
470 (purecopy
471 (format mail-extr-x400-encoded-address-field-pattern-format "[Gg]")))
472
473(defconst mail-extr-x400-encoded-address-full-name-pattern
474 ;; PN stands for Personal Name. When used it represents the combination
475 ;; of the G and S fields.
476 ;; "The one system I used having this field asked it with the prompt
477 ;; `Personal Name'. But they mapped it into G and S on outgoing real
478 ;; X.400 addresses. As they mapped G and S into PN on incoming..."
479 (purecopy
480 (format mail-extr-x400-encoded-address-field-pattern-format "[Pp][Nn]")))
481
482\f
483
484;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
485;;
486;; Syntax tables used for quick parsing.
487;;
488
489(defconst mail-extr-address-syntax-table (make-syntax-table))
490(defconst mail-extr-address-comment-syntax-table (make-syntax-table))
491(defconst mail-extr-address-domain-literal-syntax-table (make-syntax-table))
492(defconst mail-extr-address-text-comment-syntax-table (make-syntax-table))
493(defconst mail-extr-address-text-syntax-table (make-syntax-table))
d980c402
SM
494(mapc
495 (lambda (pair)
496 (let ((syntax-table (symbol-value (car pair))))
497 (dolist (item (cdr pair))
498 (if (eq 2 (length item))
499 ;; modifying syntax of a single character
500 (modify-syntax-entry (car item) (car (cdr item)) syntax-table)
501 ;; modifying syntax of a range of characters
502 (let ((char (nth 0 item))
503 (bound (nth 1 item))
504 (syntax (nth 2 item)))
505 (while (<= char bound)
506 (modify-syntax-entry char syntax syntax-table)
507 (setq char (1+ char))))))))
154b3e39
RS
508 '((mail-extr-address-syntax-table
509 (?\000 ?\037 "w") ;control characters
510 (?\040 " ") ;SPC
511 (?! ?~ "w") ;printable characters
512 (?\177 "w") ;DEL
72c0ae01
ER
513 (?\t " ")
514 (?\r " ")
515 (?\n " ")
516 (?\( ".")
517 (?\) ".")
518 (?< ".")
519 (?> ".")
520 (?@ ".")
521 (?, ".")
522 (?\; ".")
523 (?: ".")
524 (?\\ "\\")
525 (?\" "\"")
526 (?. ".")
527 (?\[ ".")
528 (?\] ".")
529 ;; % and ! aren't RFC822 characters, but it is convenient to pretend
530 (?% ".")
154b3e39 531 (?! ".") ;; this needs to be word-constituent when not in .UUCP mode
72c0ae01 532 )
154b3e39
RS
533 (mail-extr-address-comment-syntax-table
534 (?\000 ?\377 "w")
535 (?\040 " ")
536 (?\240 " ")
537 (?\t " ")
538 (?\r " ")
539 (?\n " ")
72c0ae01
ER
540 (?\( "\(\)")
541 (?\) "\)\(")
542 (?\\ "\\"))
154b3e39
RS
543 (mail-extr-address-domain-literal-syntax-table
544 (?\000 ?\377 "w")
545 (?\040 " ")
546 (?\240 " ")
547 (?\t " ")
548 (?\r " ")
549 (?\n " ")
72c0ae01
ER
550 (?\[ "\(\]") ;??????
551 (?\] "\)\[") ;??????
552 (?\\ "\\"))
154b3e39
RS
553 (mail-extr-address-text-comment-syntax-table
554 (?\000 ?\377 "w")
555 (?\040 " ")
556 (?\240 " ")
557 (?\t " ")
558 (?\r " ")
559 (?\n " ")
72c0ae01
ER
560 (?\( "\(\)")
561 (?\) "\)\(")
562 (?\[ "\(\]")
563 (?\] "\)\[")
564 (?\{ "\(\}")
565 (?\} "\)\{")
566 (?\\ "\\")
567 (?\" "\"")
568 ;; (?\' "\)\`")
569 ;; (?\` "\(\'")
570 )
154b3e39
RS
571 (mail-extr-address-text-syntax-table
572 (?\000 ?\177 ".")
573 (?\200 ?\377 "w")
574 (?\040 " ")
575 (?\t " ")
576 (?\r " ")
577 (?\n " ")
72c0ae01
ER
578 (?A ?Z "w")
579 (?a ?z "w")
580 (?- "w")
581 (?\} "w")
582 (?\{ "w")
583 (?| "w")
584 (?\' "w")
585 (?~ "w")
586 (?0 ?9 "w"))
587 ))
588
589\f
154b3e39
RS
590;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
591;;
72c0ae01 592;; Utility functions and macros.
154b3e39
RS
593;;
594
ff9ed544
DL
595;; Fixme: There are Latin-1 nbsp below. If such characters should be
596;; included, this is the wrong thing to do -- it should use syntax (or
597;; regexp char classes).
598
7a9ebd0b 599(defsubst mail-extr-skip-whitespace-forward ()
154b3e39 600 ;; v19 fn skip-syntax-forward is more tasteful, but not byte-coded.
b1ccc1ce 601 (skip-chars-forward " \t\n\r "))
154b3e39 602
7a9ebd0b 603(defsubst mail-extr-skip-whitespace-backward ()
154b3e39 604 ;; v19 fn skip-syntax-backward is more tasteful, but not byte-coded.
b1ccc1ce 605 (skip-chars-backward " \t\n\r "))
7a9ebd0b 606
72c0ae01 607
7a9ebd0b
SM
608(defsubst mail-extr-undo-backslash-quoting (beg end)
609 (save-excursion
610 (save-restriction
611 (narrow-to-region beg end)
612 (goto-char (point-min))
613 ;; undo \ quoting
614 (while (search-forward "\\" nil t)
d980c402 615 (delete-char -1)
7a9ebd0b
SM
616 (or (eobp)
617 (forward-char 1))))))
618
619(defsubst mail-extr-nuke-char-at (pos)
620 (save-excursion
621 (goto-char pos)
d980c402 622 (delete-char 1)
7a9ebd0b 623 (insert ?\ )))
154b3e39
RS
624
625(put 'mail-extr-nuke-outside-range
626 'edebug-form-spec '(symbolp &optional form form atom))
627
628(defmacro mail-extr-nuke-outside-range (list-symbol
629 beg-symbol end-symbol
630 &optional no-replace)
d980c402
SM
631 "Delete all elements outside BEG..END in LIST.
632LIST-SYMBOL names a variable holding a list of buffer positions
633BEG-SYMBOL and END-SYMBOL name variables delimiting a range
634Each element of LIST-SYMBOL which lies outside of the range is
635 deleted from the list.
636Unless NO-REPLACE is true, at each of the positions in LIST-SYMBOL
637 which lie outside of the range, one character at that position is
638 replaced with a SPC."
154b3e39 639 (or (memq no-replace '(t nil))
29565a87 640 (error "no-replace must be t or nil, evaluable at macroexpand-time"))
d980c402 641 `(let ((temp ,list-symbol)
154b3e39 642 ch)
72c0ae01 643 (while temp
154b3e39 644 (setq ch (car temp))
d980c402
SM
645 (when (or (> ch ,end-symbol)
646 (< ch ,beg-symbol))
647 ,@(if no-replace
648 nil
649 `((mail-extr-nuke-char-at ch)))
650 (setcar temp nil))
72c0ae01 651 (setq temp (cdr temp)))
d980c402 652 (setq ,list-symbol (delq nil ,list-symbol))))
72c0ae01 653
154b3e39
RS
654(defun mail-extr-demarkerize (marker)
655 ;; if arg is a marker, destroys the marker, then returns the old value.
656 ;; otherwise returns the arg.
657 (if (markerp marker)
658 (let ((temp (marker-position marker)))
659 (set-marker marker nil)
660 temp)
661 marker))
662
663(defun mail-extr-markerize (pos)
664 ;; coerces pos to a marker if non-nil.
665 (if (or (markerp pos) (null pos))
666 pos
667 (copy-marker pos)))
668
7a9ebd0b 669(defsubst mail-extr-safe-move-sexp (arg)
154b3e39 670 ;; Safely skip over one balanced sexp, if there is one. Return t if success.
7a9ebd0b
SM
671 (condition-case error
672 (progn
673 (goto-char (or (scan-sexps (point) arg) (point)))
674 t)
675 (error
676 ;; #### kludge kludge kludge kludge kludge kludge kludge !!!
677 (if (string-equal (nth 1 error) "Unbalanced parentheses")
678 nil
679 (while t
680 (signal (car error) (cdr error)))))))
72c0ae01 681\f
154b3e39
RS
682;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
683;;
72c0ae01 684;; The main function to grind addresses
154b3e39
RS
685;;
686
687(defvar disable-initial-guessing-flag) ; dynamic assignment
688(defvar cbeg) ; dynamic assignment
689(defvar cend) ; dynamic assignment
72c0ae01 690
154b3e39 691;;;###autoload
d1782bd8
KH
692(defun mail-extract-address-components (address &optional all)
693 "Given an RFC-822 address ADDRESS, extract full name and canonical address.
154b3e39 694Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
2a487c6c
SJ
695If no name can be extracted, FULL-NAME will be nil. Also see
696`mail-extr-ignore-single-names'.
d1782bd8
KH
697
698If the optional argument ALL is non-nil, then ADDRESS can contain zero
699or more recipients, separated by commas, and we return a list of
700the form ((FULL-NAME CANONICAL-ADDRESS) ...) with one element for
701each recipient. If ALL is nil, then if ADDRESS contains more than
702one recipients, all but the first is ignored.
703
7a9ebd0b 704ADDRESS may be a string or a buffer. If it is a buffer, the visible
2a487c6c
SJ
705\(narrowed) portion of the buffer will be interpreted as the address.
706\(This feature exists so that the clever caller might be able to avoid
707consing a string.)"
154b3e39
RS
708 (let ((canonicalization-buffer (get-buffer-create " *canonical address*"))
709 (extraction-buffer (get-buffer-create " *extract address components*"))
d1782bd8
KH
710 value-list)
711
7a9ebd0b 712 (with-current-buffer (get-buffer-create extraction-buffer)
154b3e39 713 (fundamental-mode)
e8a57935 714 (buffer-disable-undo extraction-buffer)
154b3e39 715 (set-syntax-table mail-extr-address-syntax-table)
72c0ae01
ER
716 (widen)
717 (erase-buffer)
718 (setq case-fold-search nil)
6c83d99f 719
72c0ae01
ER
720 ;; Insert extra space at beginning to allow later replacement with <
721 ;; without having to move markers.
154b3e39
RS
722 (insert ?\ )
723
724 ;; Insert the address itself.
725 (cond ((stringp address)
726 (insert address))
727 ((bufferp address)
728 (insert-buffer-substring address))
729 (t
29565a87 730 (error "Invalid address: %s" address)))
03007ccc
RS
731
732 (set-text-properties (point-min) (point-max) nil)
d1782bd8 733
7a9ebd0b 734 (with-current-buffer (get-buffer-create canonicalization-buffer)
d1782bd8
KH
735 (fundamental-mode)
736 (buffer-disable-undo canonicalization-buffer)
d1782bd8
KH
737 (setq case-fold-search nil))
738
6c83d99f 739
72c0ae01
ER
740 ;; Unfold multiple lines.
741 (goto-char (point-min))
742 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
743 (replace-match "\\1 " t))
6c83d99f 744
d1782bd8
KH
745 ;; Loop over addresses until we have as many as we want.
746 (while (and (or all (null value-list))
747 (progn (goto-char (point-min))
748 (skip-chars-forward " \t")
749 (not (eobp))))
750 (let (char
751 end-of-address
f98b752f 752 <-pos >-pos @-pos colon-pos comma-pos !-pos %-pos \;-pos
d1782bd8
KH
753 group-:-pos group-\;-pos route-addr-:-pos
754 record-pos-symbol
755 first-real-pos last-real-pos
756 phrase-beg phrase-end
757 cbeg cend ; dynamically set from -voodoo
758 quote-beg quote-end
759 atom-beg atom-end
760 mbox-beg mbox-end
761 \.-ends-name
762 temp
763 ;; name-suffix
764 fi mi li ; first, middle, last initial
765 saved-%-pos saved-!-pos saved-@-pos
766 domain-pos \.-pos insert-point
767 ;; mailbox-name-processed-flag
768 disable-initial-guessing-flag) ; dynamically set from -voodoo
769
7a9ebd0b 770 (set-syntax-table mail-extr-address-syntax-table)
d1782bd8
KH
771 (goto-char (point-min))
772
773 ;; Insert extra space at beginning to allow later replacement with <
774 ;; without having to move markers.
775 (or (eq (following-char) ?\ )
776 (insert ?\ ))
777
778 ;; First pass grabs useful information about address.
779 (while (progn
780 (mail-extr-skip-whitespace-forward)
781 (not (eobp)))
782 (setq char (char-after (point)))
783 (or first-real-pos
784 (if (not (eq char ?\())
785 (setq first-real-pos (point))))
786 (cond
787 ;; comment
788 ((eq char ?\()
789 (set-syntax-table mail-extr-address-comment-syntax-table)
790 ;; only record the first non-empty comment's position
791 (if (and (not cbeg)
792 (save-excursion
793 (forward-char 1)
794 (mail-extr-skip-whitespace-forward)
795 (not (eq ?\) (char-after (point))))))
796 (setq cbeg (point)))
797 ;; TODO: don't record if unbalanced
798 (or (mail-extr-safe-move-sexp 1)
799 (forward-char 1))
800 (set-syntax-table mail-extr-address-syntax-table)
801 (if (and cbeg
802 (not cend))
803 (setq cend (point))))
804 ;; quoted text
805 ((eq char ?\")
806 ;; only record the first non-empty quote's position
807 (if (and (not quote-beg)
808 (save-excursion
809 (forward-char 1)
810 (mail-extr-skip-whitespace-forward)
811 (not (eq ?\" (char-after (point))))))
812 (setq quote-beg (point)))
813 ;; TODO: don't record if unbalanced
814 (or (mail-extr-safe-move-sexp 1)
815 (forward-char 1))
816 (if (and quote-beg
817 (not quote-end))
818 (setq quote-end (point))))
819 ;; domain literals
820 ((eq char ?\[)
821 (set-syntax-table mail-extr-address-domain-literal-syntax-table)
822 (or (mail-extr-safe-move-sexp 1)
823 (forward-char 1))
824 (set-syntax-table mail-extr-address-syntax-table))
825 ;; commas delimit addresses when outside < > pairs.
826 ((and (eq char ?,)
827 (or (and (null <-pos)
828 ;; Handle ROUTE-ADDR address that is missing its <.
829 (not (eq ?@ (char-after (1+ (point))))))
830 (and >-pos
831 ;; handle weird munged addresses
832 ;; BUG FIX: This test was reversed. Thanks to the
833 ;; brilliant Rod Whitby <rwhitby@research.canon.oz.au>
834 ;; for discovering this!
7a9ebd0b 835 (< (car (last <-pos)) (car >-pos)))))
d1782bd8
KH
836 ;; The argument contains more than one address.
837 ;; Temporarily hide everything after this one.
7a9ebd0b 838 (setq end-of-address (copy-marker (1+ (point)) t))
d1782bd8 839 (narrow-to-region (point-min) (1+ (point)))
7a9ebd0b 840 (delete-char 1)
d1782bd8
KH
841 (setq char ?\() ; HAVE I NO SHAME??
842 )
843 ;; record the position of various interesting chars, determine
844 ;; legality later.
845 ((setq record-pos-symbol
846 (cdr (assq char
847 '((?< . <-pos) (?> . >-pos) (?@ . @-pos)
f98b752f 848 (?: . colon-pos) (?, . comma-pos) (?! . !-pos)
d1782bd8
KH
849 (?% . %-pos) (?\; . \;-pos)))))
850 (set record-pos-symbol
851 (cons (point) (symbol-value record-pos-symbol)))
72c0ae01 852 (forward-char 1))
d1782bd8 853 ((eq char ?.)
72c0ae01 854 (forward-char 1))
d1782bd8
KH
855 ((memq char '(
856 ;; comment terminator illegal
857 ?\)
858 ;; domain literal terminator illegal
859 ?\]
860 ;; \ allowed only within quoted strings,
861 ;; domain literals, and comments
862 ?\\
863 ))
864 (mail-extr-nuke-char-at (point))
72c0ae01 865 (forward-char 1))
d1782bd8
KH
866 (t
867 (forward-word 1)))
868 (or (eq char ?\()
869 ;; At the end of first address of a multiple address header.
870 (and (eq char ?,)
871 (eobp))
872 (setq last-real-pos (point))))
873
874 ;; Use only the leftmost <, if any. Replace all others with spaces.
875 (while (cdr <-pos)
876 (mail-extr-nuke-char-at (car <-pos))
877 (setq <-pos (cdr <-pos)))
878
879 ;; Use only the rightmost >, if any. Replace all others with spaces.
880 (while (cdr >-pos)
881 (mail-extr-nuke-char-at (nth 1 >-pos))
882 (setcdr >-pos (nthcdr 2 >-pos)))
883
884 ;; If multiple @s and a :, but no < and >, insert around buffer.
885 ;; Example: @foo.bar.dom,@xxx.yyy.zzz:mailbox@aaa.bbb.ccc
886 ;; This commonly happens on the UUCP "From " line. Ugh.
d980c402 887 (when (and (> (length @-pos) 1)
f98b752f 888 (eq 1 (length colon-pos)) ;TODO: check if between last two @s
d1782bd8
KH
889 (not \;-pos)
890 (not <-pos))
d980c402
SM
891 (goto-char (point-min))
892 (delete-char 1)
893 (setq <-pos (list (point)))
894 (insert ?<))
d1782bd8
KH
895
896 ;; If < but no >, insert > in rightmost possible position
d980c402
SM
897 (when (and <-pos (null >-pos))
898 (goto-char (point-max))
899 (setq >-pos (list (point)))
900 (insert ?>))
d1782bd8
KH
901
902 ;; If > but no <, replace > with space.
d980c402
SM
903 (when (and >-pos (null <-pos))
904 (mail-extr-nuke-char-at (car >-pos))
905 (setq >-pos nil))
d1782bd8
KH
906
907 ;; Turn >-pos and <-pos into non-lists
908 (setq >-pos (car >-pos)
909 <-pos (car <-pos))
910
911 ;; Trim other punctuation lists of items outside < > pair to handle
912 ;; stupid MTAs.
d980c402
SM
913 (when <-pos ; don't need to check >-pos also
914 ;; handle bozo software that violates RFC 822 by sticking
915 ;; punctuation marks outside of a < > pair
916 (mail-extr-nuke-outside-range @-pos <-pos >-pos t)
917 ;; RFC 822 says nothing about these two outside < >, but
918 ;; remove those positions from the lists to make things
919 ;; easier.
920 (mail-extr-nuke-outside-range !-pos <-pos >-pos t)
921 (mail-extr-nuke-outside-range %-pos <-pos >-pos t))
d1782bd8
KH
922
923 ;; Check for : that indicates GROUP list and for : part of
924 ;; ROUTE-ADDR spec.
925 ;; Can't possibly be more than two :. Nuke any extra.
f98b752f
KH
926 (while colon-pos
927 (setq temp (car colon-pos)
928 colon-pos (cdr colon-pos))
d1782bd8
KH
929 (cond ((and <-pos >-pos
930 (> temp <-pos)
931 (< temp >-pos))
932 (if (or route-addr-:-pos
933 (< (length @-pos) 2)
934 (> temp (car @-pos))
935 (< temp (nth 1 @-pos)))
936 (mail-extr-nuke-char-at temp)
937 (setq route-addr-:-pos temp)))
938 ((or (not <-pos)
939 (and <-pos
940 (< temp <-pos)))
941 (setq group-:-pos temp))))
942
943 ;; Nuke any ; that is in or to the left of a < > pair or to the left
944 ;; of a GROUP starting :. Also, there may only be one ;.
945 (while \;-pos
946 (setq temp (car \;-pos)
947 \;-pos (cdr \;-pos))
948 (cond ((and <-pos >-pos
949 (> temp <-pos)
950 (< temp >-pos))
951 (mail-extr-nuke-char-at temp))
952 ((and (or (not group-:-pos)
953 (> temp group-:-pos))
954 (not group-\;-pos))
955 (setq group-\;-pos temp))))
956
957 ;; Nuke unmatched GROUP syntax characters.
d980c402
SM
958 (when (and group-:-pos (not group-\;-pos))
959 ;; *** Do I really need to erase it?
960 (mail-extr-nuke-char-at group-:-pos)
961 (setq group-:-pos nil))
962 (when (and group-\;-pos (not group-:-pos))
963 ;; *** Do I really need to erase it?
964 (mail-extr-nuke-char-at group-\;-pos)
965 (setq group-\;-pos nil))
d1782bd8
KH
966
967 ;; Handle junk like ";@host.company.dom" that sendmail adds.
968 ;; **** should I remember comment positions?
d980c402 969 (when group-\;-pos
d1782bd8
KH
970 ;; this is fine for now
971 (mail-extr-nuke-outside-range !-pos group-:-pos group-\;-pos t)
972 (mail-extr-nuke-outside-range @-pos group-:-pos group-\;-pos t)
973 (mail-extr-nuke-outside-range %-pos group-:-pos group-\;-pos t)
974 (mail-extr-nuke-outside-range comma-pos group-:-pos group-\;-pos t)
975 (and last-real-pos
976 (> last-real-pos (1+ group-\;-pos))
977 (setq last-real-pos (1+ group-\;-pos)))
978 ;; *** This may be wrong:
979 (and cend
980 (> cend group-\;-pos)
981 (setq cend nil
982 cbeg nil))
983 (and quote-end
984 (> quote-end group-\;-pos)
985 (setq quote-end nil
986 quote-beg nil))
987 ;; This was both wrong and unnecessary:
988 ;;(narrow-to-region (point-min) group-\;-pos)
989
990 ;; *** The entire handling of GROUP addresses seems rather lame.
991 ;; *** It deserves a complete rethink, except that these addresses
992 ;; *** are hardly ever seen.
d980c402 993 )
d1782bd8
KH
994
995 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any
996 ;; others.
6c83d99f 997 ;; Hell, go ahead and nuke all of the commas.
d1782bd8
KH
998 ;; **** This will cause problems when we start handling commas in
999 ;; the PHRASE part .... no it won't ... yes it will ... ?????
1000 (mail-extr-nuke-outside-range comma-pos 1 1)
1001
1002 ;; can only have multiple @s inside < >. The fact that some MTAs
1003 ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is
1004 ;; handled above.
1005
1006 ;; Locate PHRASE part of ROUTE-ADDR.
d980c402
SM
1007 (when <-pos
1008 (goto-char <-pos)
1009 (mail-extr-skip-whitespace-backward)
1010 (setq phrase-end (point))
1011 (goto-char (or ;;group-:-pos
1012 (point-min)))
1013 (mail-extr-skip-whitespace-forward)
1014 (if (< (point) phrase-end)
1015 (setq phrase-beg (point))
1016 (setq phrase-end nil)))
d1782bd8
KH
1017
1018 ;; handle ROUTE-ADDRS with real ROUTEs.
1019 ;; If there are multiple @s, then we assume ROUTE-ADDR syntax, and
1020 ;; any % or ! must be semantically meaningless.
1021 ;; TODO: do this processing into canonicalization buffer
d980c402
SM
1022 (when route-addr-:-pos
1023 (setq !-pos nil
1024 %-pos nil
1025 >-pos (copy-marker >-pos)
1026 route-addr-:-pos (copy-marker route-addr-:-pos))
1027 (goto-char >-pos)
1028 (insert-before-markers ?X)
1029 (goto-char (car @-pos))
1030 (while (setq @-pos (cdr @-pos))
1031 (delete-char 1)
1032 (setq %-pos (cons (point-marker) %-pos))
1033 (insert "%")
1034 (goto-char (1- >-pos))
1035 (save-excursion
1036 (insert-buffer-substring extraction-buffer
1037 (car @-pos) route-addr-:-pos)
1038 (delete-region (car @-pos) route-addr-:-pos))
1039 (or (cdr @-pos)
1040 (setq saved-@-pos (list (point)))))
1041 (setq @-pos saved-@-pos)
1042 (goto-char >-pos)
1043 (delete-char -1)
1044 (mail-extr-nuke-char-at route-addr-:-pos)
1045 (mail-extr-demarkerize route-addr-:-pos)
1046 (setq route-addr-:-pos nil
1047 >-pos (mail-extr-demarkerize >-pos)
1048 %-pos (mapcar 'mail-extr-demarkerize %-pos)))
d1782bd8
KH
1049
1050 ;; de-listify @-pos
1051 (setq @-pos (car @-pos))
1052
1053 ;; TODO: remove comments in the middle of an address
1054
d980c402 1055 (with-current-buffer canonicalization-buffer
d1782bd8
KH
1056 (widen)
1057 (erase-buffer)
1058 (insert-buffer-substring extraction-buffer)
1059
1060 (if <-pos
1061 (narrow-to-region (progn
1062 (goto-char (1+ <-pos))
1063 (mail-extr-skip-whitespace-forward)
1064 (point))
1065 >-pos)
1066 (if (and first-real-pos last-real-pos)
1067 (narrow-to-region first-real-pos last-real-pos)
1068 ;; ****** Oh no! What if the address is completely empty!
1069 ;; *** Is this correct?
d980c402 1070 (narrow-to-region (point-max) (point-max))))
d1782bd8
KH
1071
1072 (and @-pos %-pos
1073 (mail-extr-nuke-outside-range %-pos (point-min) @-pos))
1074 (and %-pos !-pos
1075 (mail-extr-nuke-outside-range !-pos (point-min) (car %-pos)))
1076 (and @-pos !-pos (not %-pos)
1077 (mail-extr-nuke-outside-range !-pos (point-min) @-pos))
1078
1079 ;; Error condition:?? (and %-pos (not @-pos))
1080
1081 ;; WARNING: THIS CODE IS DUPLICATED BELOW.
d980c402
SM
1082 (when (and %-pos (not @-pos))
1083 (goto-char (car %-pos))
1084 (delete-char 1)
1085 (setq @-pos (point))
1086 (insert "@")
1087 (setq %-pos (cdr %-pos)))
1088
1089 (when (and mail-extr-mangle-uucp !-pos)
1090 ;; **** I don't understand this save-restriction and the
1091 ;; narrow-to-region inside it. Why did I do that?
1092 (save-restriction
1093 (cond ((and @-pos
1094 mail-extr-@-binds-tighter-than-!)
1095 (goto-char @-pos)
1096 (setq %-pos (cons (point) %-pos)
1097 @-pos nil)
1098 (delete-char 1)
1099 (insert "%")
1100 (setq insert-point (point-max)))
1101 (mail-extr-@-binds-tighter-than-!
1102 (setq insert-point (point-max)))
1103 (%-pos
1104 (setq insert-point (car (last %-pos))
1105 saved-%-pos (mapcar 'mail-extr-markerize %-pos)
1106 %-pos nil
1107 @-pos (mail-extr-markerize @-pos)))
1108 (@-pos
1109 (setq insert-point @-pos)
1110 (setq @-pos (mail-extr-markerize @-pos)))
1111 (t
1112 (setq insert-point (point-max))))
1113 (narrow-to-region (point-min) insert-point)
1114 (setq saved-!-pos (car !-pos))
1115 (while !-pos
1116 (goto-char (point-max))
1117 (cond ((and (not @-pos)
1118 (not (cdr !-pos)))
1119 (setq @-pos (point))
1120 (insert-before-markers "@ "))
1121 (t
1122 (setq %-pos (cons (point) %-pos))
1123 (insert-before-markers "% ")))
1124 (backward-char 1)
1125 (insert-buffer-substring
1126 (current-buffer)
1127 (if (nth 1 !-pos)
1128 (1+ (nth 1 !-pos))
1129 (point-min))
1130 (car !-pos))
1131 (delete-char 1)
1132 (or (save-excursion
1133 (mail-extr-safe-move-sexp -1)
1134 (mail-extr-skip-whitespace-backward)
1135 (eq ?. (preceding-char)))
1136 (insert-before-markers
1137 (if (save-excursion
1138 (mail-extr-skip-whitespace-backward)
1139 (eq ?. (preceding-char)))
1140 ""
1141 ".")
1142 "uucp"))
1143 (setq !-pos (cdr !-pos))))
1144 (and saved-%-pos
1145 (setq %-pos (append (mapcar 'mail-extr-demarkerize
1146 saved-%-pos)
1147 %-pos)))
1148 (setq @-pos (mail-extr-demarkerize @-pos))
1149 (narrow-to-region (1+ saved-!-pos) (point-max)))
d1782bd8
KH
1150
1151 ;; WARNING: THIS CODE IS DUPLICATED ABOVE.
d980c402
SM
1152 (when (and %-pos (not @-pos))
1153 (goto-char (car %-pos))
1154 (delete-char 1)
1155 (setq @-pos (point))
1156 (insert "@")
1157 (setq %-pos (cdr %-pos)))
1158
1159 (when (setq %-pos (nreverse %-pos)) ; implies @-pos valid
1160 (setq temp %-pos)
1161 (catch 'truncated
1162 (while temp
1163 (goto-char (or (nth 1 temp)
1164 @-pos))
1165 (mail-extr-skip-whitespace-backward)
1166 (save-excursion
1167 (mail-extr-safe-move-sexp -1)
1168 (setq domain-pos (point))
1169 (mail-extr-skip-whitespace-backward)
1170 (setq \.-pos (eq ?. (preceding-char))))
1171 (when (and \.-pos
1172 ;; #### string consing
1173 (let ((s (intern-soft
1174 (buffer-substring domain-pos (point))
1175 mail-extr-all-top-level-domains)))
1176 (and s (get s 'domain-name))))
1177 (narrow-to-region (point-min) (point))
1178 (goto-char (car temp))
1179 (delete-char 1)
1180 (setq @-pos (point))
1181 (setcdr temp nil)
1182 (setq %-pos (delq @-pos %-pos))
1183 (insert "@")
1184 (throw 'truncated t))
1185 (setq temp (cdr temp)))))
d1782bd8
KH
1186 (setq mbox-beg (point-min)
1187 mbox-end (if %-pos (car %-pos)
1188 (or @-pos
d980c402
SM
1189 (point-max))))
1190
1191 (when @-pos
1192 ;; Make the domain-name part lowercase since it's case
1193 ;; insensitive anyway.
1194 (downcase-region (1+ @-pos) (point-max))))
d1782bd8
KH
1195
1196 ;; Done canonicalizing address.
1197 ;; We are now back in extraction-buffer.
1198
1199 ;; Decide what part of the address to search to find the full name.
1200 (cond (
1201 ;; Example: "First M. Last" <fml@foo.bar.dom>
1202 (and phrase-beg
1203 (eq quote-beg phrase-beg)
1204 (<= quote-end phrase-end))
1205 (narrow-to-region (1+ quote-beg) (1- quote-end))
1206 (mail-extr-undo-backslash-quoting (point-min) (point-max)))
1207
1208 ;; Example: First Last <fml@foo.bar.dom>
1209 (phrase-beg
1210 (narrow-to-region phrase-beg phrase-end))
1211
1212 ;; Example: fml@foo.bar.dom (First M. Last)
1213 (cbeg
1214 (narrow-to-region (1+ cbeg) (1- cend))
1215 (mail-extr-undo-backslash-quoting (point-min) (point-max))
1216
1217 ;; Deal with spacing problems
1218 (goto-char (point-min))
1219;;; (cond ((not (search-forward " " nil t))
1220;;; (goto-char (point-min))
1221;;; (cond ((search-forward "_" nil t)
1222;;; ;; Handle the *idiotic* use of underlines as spaces.
1223;;; ;; Example: fml@foo.bar.dom (First_M._Last)
1224;;; (goto-char (point-min))
1225;;; (while (search-forward "_" nil t)
1226;;; (replace-match " " t)))
1227;;; ((search-forward "." nil t)
1228;;; ;; Fix . used as space
1229;;; ;; Example: danj1@cb.att.com (daniel.jacobson)
1230;;; (goto-char (point-min))
1231;;; (while (re-search-forward mail-extr-bad-dot-pattern nil t)
1232;;; (replace-match "\\1 \\2" t))))))
154b3e39 1233 )
d1782bd8
KH
1234
1235 ;; Otherwise we try to get the name from the mailbox portion
1236 ;; of the address.
1237 ;; Example: First_M_Last@foo.bar.dom
72c0ae01 1238 (t
d1782bd8
KH
1239 ;; *** Work in canon buffer instead? No, can't. Hmm.
1240 (goto-char (point-max))
1241 (narrow-to-region (point) (point))
1242 (insert-buffer-substring canonicalization-buffer
1243 mbox-beg mbox-end)
1244 (goto-char (point-min))
1245
1246 ;; Example: First_Last.XXX@foo.bar.dom
1247 (setq \.-ends-name (re-search-forward "[_0-9]" nil t))
1248
1249 (goto-char (point-min))
1250
1251 (if (not mail-extr-mangle-uucp)
1252 (modify-syntax-entry ?! "w" (syntax-table)))
1253
1254 (while (progn
1255 (mail-extr-skip-whitespace-forward)
1256 (not (eobp)))
1257 (setq char (char-after (point)))
154b3e39 1258 (cond
d1782bd8
KH
1259 ((eq char ?\")
1260 (setq quote-beg (point))
1261 (or (mail-extr-safe-move-sexp 1)
1262 ;; TODO: handle this error condition!!!!!
1263 (forward-char 1))
1264 ;; take into account deletions
1265 (setq quote-end (- (point) 2))
1266 (save-excursion
1267 (backward-char 1)
d980c402 1268 (delete-char 1)
d1782bd8
KH
1269 (goto-char quote-beg)
1270 (or (eobp)
d980c402 1271 (delete-char 1)))
d1782bd8
KH
1272 (mail-extr-undo-backslash-quoting quote-beg quote-end)
1273 (or (eq ?\ (char-after (point)))
1274 (insert " "))
1275 ;; (setq mailbox-name-processed-flag t)
1276 (setq \.-ends-name t))
1277 ((eq char ?.)
1278 (if (memq (char-after (1+ (point))) '(?_ ?=))
1279 (progn
1280 (forward-char 1)
d980c402 1281 (delete-char 1)
d1782bd8
KH
1282 (insert ?\ ))
1283 (if \.-ends-name
1284 (narrow-to-region (point-min) (point))
d980c402 1285 (delete-char 1)
d1782bd8
KH
1286 (insert " ")))
1287 ;; (setq mailbox-name-processed-flag t)
1288 )
1289 ((memq (char-syntax char) '(?. ?\\))
d980c402 1290 (delete-char 1)
d1782bd8
KH
1291 (insert " ")
1292 ;; (setq mailbox-name-processed-flag t)
154b3e39 1293 )
154b3e39 1294 (t
d1782bd8
KH
1295 (setq atom-beg (point))
1296 (forward-word 1)
1297 (setq atom-end (point))
1298 (goto-char atom-beg)
1299 (save-restriction
1300 (narrow-to-region atom-beg atom-end)
1301 (cond
1302
1303 ;; Handle X.400 addresses encoded in RFC-822.
1304 ;; *** Shit! This has to handle the case where it is
1305 ;; *** embedded in a quote too!
1306 ;; *** Shit! The input is being broken up into atoms
1307 ;; *** by periods!
1308 ((looking-at mail-extr-x400-encoded-address-pattern)
1309
1310 ;; Copy the contents of the individual fields that
1311 ;; might hold name data to the beginning.
d980c402
SM
1312 (mapc
1313 (lambda (field-pattern)
1314 (when
1315 (save-excursion
1316 (re-search-forward field-pattern nil t))
1317 (insert-buffer-substring (current-buffer)
1318 (match-beginning 1)
1319 (match-end 1))
1320 (insert " ")))
d1782bd8
KH
1321 (list mail-extr-x400-encoded-address-given-name-pattern
1322 mail-extr-x400-encoded-address-surname-pattern
1323 mail-extr-x400-encoded-address-full-name-pattern))
1324
1325 ;; Discard the rest, since it contains stuff like
1326 ;; routing information, not part of a name.
1327 (mail-extr-skip-whitespace-backward)
1328 (delete-region (point) (point-max))
1329
1330 ;; Handle periods used for spacing.
1331 (while (re-search-forward mail-extr-bad-dot-pattern nil t)
1332 (replace-match "\\1 \\2" t))
1333
1334 ;; (setq mailbox-name-processed-flag t)
1335 )
1336
1337 ;; Handle normal addresses.
1338 (t
1339 (goto-char (point-min))
1340 ;; Handle _ and = used for spacing.
1341 (while (re-search-forward "\\([^_=]+\\)[_=]" nil t)
1342 (replace-match "\\1 " t)
1343 ;; (setq mailbox-name-processed-flag t)
1344 )
1345 (goto-char (point-max))))))))
1346
1347 ;; undo the dirty deed
1348 (if (not mail-extr-mangle-uucp)
1349 (modify-syntax-entry ?! "." (syntax-table)))
1350 ;;
1351 ;; If we derived the name from the mailbox part of the address,
1352 ;; and we only got one word out of it, don't treat that as a
1353 ;; name. "foo@bar" --> (nil "foo@bar"), not ("foo" "foo@bar")
1354 ;; (if (not mailbox-name-processed-flag)
1355 ;; (delete-region (point-min) (point-max)))
1356 ))
1357
1358 (set-syntax-table mail-extr-address-text-syntax-table)
1359
1360 (mail-extr-voodoo mbox-beg mbox-end canonicalization-buffer)
1361 (goto-char (point-min))
1362
1363 ;; If name is "First Last" and userid is "F?L", then assume
1364 ;; the middle initial is the second letter in the userid.
1365 ;; Initial code by Jamie Zawinski <jwz@lucid.com>
1366 ;; *** Make it work when there's a suffix as well.
1367 (goto-char (point-min))
d980c402
SM
1368 (when (and mail-extr-guess-middle-initial
1369 (not disable-initial-guessing-flag)
1370 (eq 3 (- mbox-end mbox-beg))
1371 (progn
1372 (goto-char (point-min))
1373 (looking-at mail-extr-two-name-pattern)))
1374 (setq fi (char-after (match-beginning 0))
1375 li (char-after (match-beginning 3)))
1376 (with-current-buffer canonicalization-buffer
1377 ;; char-equal is ignoring case here, so no need to upcase
1378 ;; or downcase.
1379 (let ((case-fold-search t))
1380 (and (char-equal fi (char-after mbox-beg))
1381 (char-equal li (char-after (1- mbox-end)))
1382 (setq mi (char-after (1+ mbox-beg))))))
1383 (when (and mi
1384 ;; TODO: use better table than syntax table
1385 (eq ?w (char-syntax mi)))
1386 (goto-char (match-beginning 3))
1387 (insert (upcase mi) ". ")))
d1782bd8
KH
1388
1389 ;; Nuke name if it is the same as mailbox name.
1390 (let ((buffer-length (- (point-max) (point-min)))
1391 (i 0)
1392 (names-match-flag t))
d980c402
SM
1393 (when (and (> buffer-length 0)
1394 (eq buffer-length (- mbox-end mbox-beg)))
1395 (goto-char (point-max))
1396 (insert-buffer-substring canonicalization-buffer
1397 mbox-beg mbox-end)
1398 (while (and names-match-flag
1399 (< i buffer-length))
1400 (or (eq (downcase (char-after (+ i (point-min))))
1401 (downcase
1402 (char-after (+ i buffer-length (point-min)))))
1403 (setq names-match-flag nil))
1404 (setq i (1+ i)))
1405 (delete-region (+ (point-min) buffer-length) (point-max))
1406 (if names-match-flag
1407 (narrow-to-region (point) (point)))))
d1782bd8
KH
1408
1409 ;; Nuke name if it's just one word.
1410 (goto-char (point-min))
1411 (and mail-extr-ignore-single-names
1412 (not (re-search-forward "[- ]" nil t))
1413 (narrow-to-region (point) (point)))
1414
1415 ;; Record the result
1416 (setq value-list
1417 (cons (list (if (not (= (point-min) (point-max)))
1418 (buffer-string))
d980c402 1419 (with-current-buffer canonicalization-buffer
d1782bd8
KH
1420 (if (not (= (point-min) (point-max)))
1421 (buffer-string))))
1422 value-list))
1423
1424 ;; Unless one address is all we wanted,
1425 ;; delete this one from extraction-buffer
1426 ;; and get ready to extract the next address.
1427 (when all
1428 (if end-of-address
1429 (narrow-to-region 1 end-of-address)
1430 (widen))
1431 (delete-region (point-min) (point-max))
1432 (widen))
1433 )))
1434 (if all (nreverse value-list) (car value-list))
1435 ))
72c0ae01 1436
154b3e39
RS
1437(defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer)
1438 (let ((word-count 0)
1439 (case-fold-search nil)
1440 mixed-case-flag lower-case-flag ;;upper-case-flag
72c0ae01 1441 suffix-flag last-name-comma-flag
154b3e39
RS
1442 ;;cbeg cend
1443 initial
1444 begin-again-flag
1445 drop-this-word-if-trailing-flag
1446 drop-last-word-if-trailing-flag
1447 word-found-flag
1448 this-word-beg last-word-beg
1449 name-beg name-end
1450 name-done-flag
72c0ae01
ER
1451 )
1452 (save-excursion
154b3e39 1453 (set-syntax-table mail-extr-address-text-syntax-table)
e432fc74
RS
1454
1455 ;; Get rid of comments.
1456 (goto-char (point-min))
1457 (while (not (eobp))
1458 ;; Initialize for this iteration of the loop.
1459 (skip-chars-forward "^({[\"'`")
1460 (let ((cbeg (point)))
1461 (set-syntax-table mail-extr-address-text-comment-syntax-table)
d980c402
SM
1462 (if (memq (following-char) '(?\' ?\`))
1463 (search-forward "'" nil 'move
1464 (if (eq ?\' (following-char)) 2 1))
1465 (or (mail-extr-safe-move-sexp 1)
1466 (goto-char (point-max))))
e432fc74
RS
1467 (set-syntax-table mail-extr-address-text-syntax-table)
1468 (when (eq (char-after cbeg) ?\()
1469 ;; Delete the comment itself.
1470 (delete-region cbeg (point))
1471 ;; Canonicalize whitespace where the comment was.
1472 (skip-chars-backward " \t")
1473 (if (looking-at "\\([ \t]+$\\|[ \t]+,\\)")
1474 (replace-match "")
1475 (setq cbeg (point))
1476 (skip-chars-forward " \t")
1477 (if (bobp)
1478 (delete-region (point) cbeg)
1479 (just-one-space))))))
6c83d99f 1480
154b3e39 1481 ;; This was moved above.
72c0ae01 1482 ;; Fix . used as space
154b3e39
RS
1483 ;; But it belongs here because it occurs not only as
1484 ;; rypens@reks.uia.ac.be (Piet.Rypens)
1485 ;; but also as
1486 ;; "Piet.Rypens" <rypens@reks.uia.ac.be>
1487 ;;(goto-char (point-min))
1488 ;;(while (re-search-forward mail-extr-bad-dot-pattern nil t)
1489 ;; (replace-match "\\1 \\2" t))
1490
d980c402
SM
1491 (unless (search-forward " " nil t)
1492 (goto-char (point-min))
1493 (cond ((search-forward "_" nil t)
1494 ;; Handle the *idiotic* use of underlines as spaces.
1495 ;; Example: fml@foo.bar.dom (First_M._Last)
1496 (goto-char (point-min))
1497 (while (search-forward "_" nil t)
1498 (replace-match " " t)))
1499 ((search-forward "." nil t)
1500 ;; Fix . used as space
1501 ;; Example: danj1@cb.att.com (daniel.jacobson)
1502 (goto-char (point-min))
1503 (while (re-search-forward mail-extr-bad-dot-pattern nil t)
1504 (replace-match "\\1 \\2" t)))))
154b3e39 1505
154b3e39 1506 ;; Loop over the words (and other junk) in the name.
72c0ae01 1507 (goto-char (point-min))
154b3e39 1508 (while (not name-done-flag)
6c83d99f 1509
d980c402
SM
1510 (when word-found-flag
1511 ;; Last time through this loop we skipped over a word.
1512 (setq last-word-beg this-word-beg)
1513 (setq drop-last-word-if-trailing-flag
1514 drop-this-word-if-trailing-flag)
1515 (setq word-found-flag nil))
1516
1517 (when begin-again-flag
1518 ;; Last time through the loop we found something that
1519 ;; indicates we should pretend we are beginning again from
1520 ;; the start.
1521 (setq word-count 0)
1522 (setq last-word-beg nil)
1523 (setq drop-last-word-if-trailing-flag nil)
1524 (setq mixed-case-flag nil)
1525 (setq lower-case-flag nil)
1526 ;; (setq upper-case-flag nil)
1527 (setq begin-again-flag nil))
6c83d99f 1528
154b3e39
RS
1529 ;; Initialize for this iteration of the loop.
1530 (mail-extr-skip-whitespace-forward)
1531 (if (eq word-count 0) (narrow-to-region (point) (point-max)))
1532 (setq this-word-beg (point))
1533 (setq drop-this-word-if-trailing-flag nil)
6c83d99f 1534
154b3e39
RS
1535 ;; Decide what to do based on what we are looking at.
1536 (cond
6c83d99f 1537
154b3e39
RS
1538 ;; Delete title
1539 ((and (eq word-count 0)
1540 (looking-at mail-extr-full-name-prefixes))
1541 (goto-char (match-end 0))
1542 (narrow-to-region (point) (point-max)))
6c83d99f 1543
154b3e39
RS
1544 ;; Stop after name suffix
1545 ((and (>= word-count 2)
1546 (looking-at mail-extr-full-name-suffix-pattern))
1547 (mail-extr-skip-whitespace-backward)
1548 (setq suffix-flag (point))
1549 (if (eq ?, (following-char))
1550 (forward-char 1)
1551 (insert ?,))
1552 ;; Enforce at least one space after comma
1553 (or (eq ?\ (following-char))
1554 (insert ?\ ))
1555 (mail-extr-skip-whitespace-forward)
1556 (cond ((memq (following-char) '(?j ?J ?s ?S))
1557 (capitalize-word 1)
1558 (if (eq (following-char) ?.)
1559 (forward-char 1)
1560 (insert ?.)))
1561 (t
1562 (upcase-word 1)))
1563 (setq word-found-flag t)
1564 (setq name-done-flag t))
6c83d99f 1565
154b3e39
RS
1566 ;; Handle SCA names
1567 ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As"
1568 (goto-char (match-beginning 1))
1569 (narrow-to-region (point) (point-max))
1570 (setq begin-again-flag t))
6c83d99f 1571
154b3e39
RS
1572 ;; Check for initial last name followed by comma
1573 ((and (eq ?, (following-char))
1574 (eq word-count 1))
1575 (forward-char 1)
1576 (setq last-name-comma-flag t)
1577 (or (eq ?\ (following-char))
1578 (insert ?\ )))
6c83d99f 1579
154b3e39
RS
1580 ;; Stop before trailing comma-separated comment
1581 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
1582 ;; *** This case is redundant???
1583 ;;((eq ?, (following-char))
1584 ;; (setq name-done-flag t))
6c83d99f 1585
154b3e39
RS
1586 ;; Delete parenthesized/quoted comment/nickname
1587 ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`))
1588 (setq cbeg (point))
1589 (set-syntax-table mail-extr-address-text-comment-syntax-table)
1590 (cond ((memq (following-char) '(?\' ?\`))
1591 (or (search-forward "'" nil t
1592 (if (eq ?\' (following-char)) 2 1))
d980c402 1593 (delete-char 1)))
154b3e39
RS
1594 (t
1595 (or (mail-extr-safe-move-sexp 1)
1596 (goto-char (point-max)))))
1597 (set-syntax-table mail-extr-address-text-syntax-table)
1598 (setq cend (point))
72c0ae01 1599 (cond
154b3e39 1600 ;; Handle case of entire name being quoted
72c0ae01 1601 ((and (eq word-count 0)
154b3e39
RS
1602 (looking-at " *\\'")
1603 (>= (- cend cbeg) 2))
1604 (narrow-to-region (1+ cbeg) (1- cend))
1605 (goto-char (point-min)))
1606 (t
1607 ;; Handle case of quoted initial
1608 (if (and (or (= 3 (- cend cbeg))
1609 (and (= 4 (- cend cbeg))
1610 (eq ?. (char-after (+ 2 cbeg)))))
1611 (not (looking-at " *\\'")))
1612 (setq initial (char-after (1+ cbeg)))
1613 (setq initial nil))
1614 (delete-region cbeg cend)
1615 (if initial
1616 (insert initial ". ")))))
6c83d99f 1617
154b3e39
RS
1618 ;; Handle *Stupid* VMS date stamps
1619 ((looking-at mail-extr-stupid-vms-date-stamp-pattern)
1620 (replace-match "" t))
6c83d99f 1621
154b3e39
RS
1622 ;; Handle Chinese characters.
1623 ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern)
1624 (goto-char (match-end 0))
1625 (setq word-found-flag t))
6c83d99f 1626
154b3e39
RS
1627 ;; Skip initial garbage characters.
1628 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
1629 ((and (eq word-count 0)
1630 (looking-at mail-extr-leading-garbage))
1631 (goto-char (match-end 0))
1632 ;; *** Skip backward over these???
1633 ;; (skip-chars-backward "& \"")
1634 (narrow-to-region (point) (point-max)))
6c83d99f 1635
154b3e39
RS
1636 ;; Various stopping points
1637 ((or
6c83d99f 1638
154b3e39
RS
1639 ;; Stop before ALL CAPS acronyms, if preceded by mixed-case
1640 ;; words. Example: XT-DEM.
1641 (and (>= word-count 2)
1642 mixed-case-flag
1643 (looking-at mail-extr-weird-acronym-pattern)
1644 (not (looking-at mail-extr-roman-numeral-pattern)))
6c83d99f 1645
154b3e39
RS
1646 ;; Stop before trailing alternative address
1647 (looking-at mail-extr-alternative-address-pattern)
6c83d99f 1648
154b3e39
RS
1649 ;; Stop before trailing comment not introduced by comma
1650 ;; THIS CASE MUST BE AFTER AN EARLIER CASE.
1651 (looking-at mail-extr-trailing-comment-start-pattern)
6c83d99f 1652
154b3e39 1653 ;; Stop before telephone numbers
af604656
RS
1654 (and (>= word-count 1)
1655 (looking-at mail-extr-telephone-extension-pattern)))
154b3e39 1656 (setq name-done-flag t))
6c83d99f 1657
154b3e39
RS
1658 ;; Delete ham radio call signs
1659 ((looking-at mail-extr-ham-call-sign-pattern)
1660 (delete-region (match-beginning 0) (match-end 0)))
6c83d99f 1661
154b3e39
RS
1662 ;; Fixup initials
1663 ((looking-at mail-extr-initial-pattern)
1664 (or (eq (following-char) (upcase (following-char)))
1665 (setq lower-case-flag t))
1666 (forward-char 1)
1667 (if (eq ?. (following-char))
1668 (forward-char 1)
1669 (insert ?.))
1670 (or (eq ?\ (following-char))
1671 (insert ?\ ))
1672 (setq word-found-flag t))
6c83d99f 1673
154b3e39
RS
1674 ;; Handle BITNET LISTSERV list names.
1675 ((and (eq word-count 0)
1676 (looking-at mail-extr-listserv-list-name-pattern))
1677 (narrow-to-region (match-beginning 1) (match-end 1))
1678 (setq word-found-flag t)
1679 (setq name-done-flag t))
6c83d99f 1680
524d06f5
RS
1681 ;; Handle & substitution, when & is last and is not first.
1682 ((and (> word-count 0)
1683 (eq ?\ (preceding-char))
1684 (eq (following-char) ?&)
1685 (eq (1+ (point)) (point-max)))
d980c402 1686 (delete-char 1)
524d06f5
RS
1687 (capitalize-region
1688 (point)
1689 (progn
1690 (insert-buffer-substring canonicalization-buffer
1691 mbox-beg mbox-end)
1692 (point)))
1693 (setq disable-initial-guessing-flag t)
1694 (setq word-found-flag t))
1695
1696 ;; Handle & between names, as in "Bob & Susie".
1697 ((and (> word-count 0) (eq (following-char) ?\&))
1698 (setq name-beg (point))
1699 (setq name-end (1+ name-beg))
1700 (setq word-found-flag t)
1701 (goto-char name-end))
1702
154b3e39
RS
1703 ;; Regular name words
1704 ((looking-at mail-extr-name-pattern)
1705 (setq name-beg (point))
1706 (setq name-end (match-end 0))
6c83d99f 1707
154b3e39
RS
1708 ;; Certain words will be dropped if they are at the end.
1709 (and (>= word-count 2)
1710 (not lower-case-flag)
1711 (or
ff9ed544 1712 ;; Trailing 4-or-more letter lowercase words preceded by
154b3e39 1713 ;; mixed case or uppercase words will be dropped.
ff9ed544 1714 (looking-at "[[:lower:]]\\{4,\\}[ \t]*\\'")
154b3e39
RS
1715 ;; Drop a trailing word which is terminated with a period.
1716 (eq ?. (char-after (1- name-end))))
1717 (setq drop-this-word-if-trailing-flag t))
6c83d99f 1718
154b3e39
RS
1719 ;; Set the flags that indicate whether we have seen a lowercase
1720 ;; word, a mixed case word, and an uppercase word.
ff9ed544 1721 (if (re-search-forward "[[:lower:]]" name-end t)
154b3e39
RS
1722 (if (progn
1723 (goto-char name-beg)
ff9ed544 1724 (re-search-forward "[[:upper:]]" name-end t))
154b3e39 1725 (setq mixed-case-flag t)
72c0ae01 1726 (setq lower-case-flag t))
154b3e39
RS
1727;; (setq upper-case-flag t)
1728 )
6c83d99f 1729
154b3e39
RS
1730 (goto-char name-end)
1731 (setq word-found-flag t))
72c0ae01 1732
af604656
RS
1733 ;; Allow a number as a word, if it doesn't mean anything else.
1734 ((looking-at "[0-9]+\\>")
1735 (setq name-beg (point))
1736 (setq name-end (match-end 0))
1737 (goto-char name-end)
1738 (setq word-found-flag t))
1739
154b3e39
RS
1740 (t
1741 (setq name-done-flag t)
1742 ))
6c83d99f 1743
154b3e39
RS
1744 ;; Count any word that we skipped over.
1745 (if word-found-flag
1746 (setq word-count (1+ word-count))))
6c83d99f 1747
154b3e39
RS
1748 ;; If the last thing in the name is 2 or more periods, or one or more
1749 ;; other sentence terminators (but not a single period) then keep them
4802af76 1750 ;; and the preceding word. This is for the benefit of whole sentences
154b3e39
RS
1751 ;; in the name field: it's better behavior than dropping the last word
1752 ;; of the sentence...
1753 (if (and (not suffix-flag)
1754 (looking-at "\\(\\.+\\|[?!;:.][?!;:.]+\\|[?!;:][?!;:.]*\\)\\'"))
1755 (goto-char (setq suffix-flag (point-max))))
1756
1757 ;; Drop everything after point and certain trailing words.
1758 (narrow-to-region (point-min)
1759 (or (and drop-last-word-if-trailing-flag
1760 last-word-beg)
1761 (point)))
6c83d99f 1762
154b3e39
RS
1763 ;; Xerox's mailers SUCK!!!!!!
1764 ;; We simply refuse to believe that any last name is PARC or ADOC.
1765 ;; If it looks like that is the last name, that there is no meaningful
1766 ;; here at all. Actually I guess it would be best to map patterns
1767 ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't
1768 ;; actually know that that is what's going on.
d980c402
SM
1769 (unless suffix-flag
1770 (goto-char (point-min))
1771 (let ((case-fold-search t))
1772 (if (looking-at "[-A-Za-z_]+[. ]\\(PARC\\|ADOC\\)\\'")
1773 (erase-buffer))))
154b3e39 1774
72c0ae01 1775 ;; If last name first put it at end (but before suffix)
d980c402
SM
1776 (when last-name-comma-flag
1777 (goto-char (point-min))
1778 (search-forward ",")
1779 (setq name-end (1- (point)))
1780 (goto-char (or suffix-flag (point-max)))
1781 (or (eq ?\ (preceding-char))
1782 (insert ?\ ))
1783 (insert-buffer-substring (current-buffer) (point-min) name-end)
1784 (goto-char name-end)
1785 (skip-chars-forward "\t ,")
1786 (narrow-to-region (point) (point-max)))
6c83d99f 1787
154b3e39 1788 ;; Delete leading and trailing junk characters.
4802af76 1789 ;; *** This is probably completely unneeded now.
154b3e39
RS
1790 ;;(goto-char (point-max))
1791 ;;(skip-chars-backward mail-extr-non-end-name-chars)
1792 ;;(if (eq ?. (following-char))
1793 ;; (forward-char 1))
1794 ;;(narrow-to-region (point)
1795 ;; (progn
1796 ;; (goto-char (point-min))
1797 ;; (skip-chars-forward mail-extr-non-begin-name-chars)
1798 ;; (point)))
6c83d99f 1799
72c0ae01
ER
1800 ;; Compress whitespace
1801 (goto-char (point-min))
1802 (while (re-search-forward "[ \t\n]+" nil t)
154b3e39 1803 (replace-match (if (eobp) "" " ") t))
72c0ae01
ER
1804 )))
1805
154b3e39 1806\f
72c0ae01 1807
154b3e39
RS
1808;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1809;;
1810;; Table of top-level domain names.
1811;;
1812;; This is used during address canonicalization; be careful of format changes.
72c0ae01
ER
1813;; Keep in mind that the country abbreviations follow ISO-3166. There is
1814;; a U.S. FIPS that specifies a different set of two-letter country
1815;; abbreviations.
97f83272
SJ
1816;;
1817;; Updated by the RIPE Network Coordination Centre.
1818;;
1819;; Source: ISO 3166 Maintenance Agency
1820;; http://www.iso.org/iso/en/prods-services/iso3166ma/02iso-3166-code-lists/list-en1-semic.txt
1821;; http://www.iana.org/domain-names.htm
1822;; http://www.iana.org/cctld/cctld-whois.htm
1823;; Latest change: Mon Jul 8 14:21:59 CEST 2002
72c0ae01 1824
154b3e39 1825(defconst mail-extr-all-top-level-domains
0b725d8c 1826 (let ((ob (make-vector 739 0)))
d980c402
SM
1827 (mapc
1828 (lambda (x)
1829 (put (intern (downcase (car x)) ob)
1830 'domain-name
1831 (if (nth 2 x)
1832 (format (nth 2 x) (nth 1 x))
1833 (nth 1 x))))
e1a34b58
RS
1834 '(
1835 ;; ISO 3166 codes:
44248b7f 1836 ("ad" "Andorra")
e1a34b58 1837 ("ae" "United Arab Emirates")
0b725d8c 1838 ("af" "Afghanistan")
e1a34b58 1839 ("ag" "Antigua and Barbuda")
0b725d8c 1840 ("ai" "Anguilla")
e1a34b58 1841 ("al" "Albania")
44248b7f 1842 ("am" "Armenia")
0b725d8c 1843 ("an" "Netherlands Antilles")
e1a34b58
RS
1844 ("ao" "Angola")
1845 ("aq" "Antarctica") ; continent
154b3e39 1846 ("ar" "Argentina" "Argentine Republic")
0b725d8c 1847 ("as" "American Samoa")
154b3e39
RS
1848 ("at" "Austria" "The Republic of %s")
1849 ("au" "Australia")
0b725d8c 1850 ("aw" "Aruba")
e1a34b58 1851 ("az" "Azerbaijan")
44248b7f 1852 ("ba" "Bosnia-Herzegovina")
154b3e39 1853 ("bb" "Barbados")
e1a34b58 1854 ("bd" "Bangladesh")
154b3e39 1855 ("be" "Belgium" "The Kingdom of %s")
e1a34b58 1856 ("bf" "Burkina Faso")
154b3e39 1857 ("bg" "Bulgaria")
e1a34b58 1858 ("bh" "Bahrain")
0b725d8c
GM
1859 ("bi" "Burundi")
1860 ("bj" "Benin")
e1a34b58 1861 ("bm" "Bermuda")
0b725d8c 1862 ("bn" "Brunei Darussalam")
154b3e39
RS
1863 ("bo" "Bolivia" "Republic of %s")
1864 ("br" "Brazil" "The Federative Republic of %s")
1865 ("bs" "Bahamas")
0b725d8c
GM
1866 ("bt" "Bhutan")
1867 ("bv" "Bouvet Island")
e1a34b58
RS
1868 ("bw" "Botswana")
1869 ("by" "Belarus")
154b3e39
RS
1870 ("bz" "Belize")
1871 ("ca" "Canada")
0b725d8c 1872 ("cc" "Cocos (Keeling) Islands")
97f83272 1873 ("cd" "Congo" "The Democratic Republic of the %s")
0b725d8c 1874 ("cf" "Central African Republic")
e1a34b58 1875 ("cg" "Congo")
154b3e39 1876 ("ch" "Switzerland" "The Swiss Confederation")
0b725d8c
GM
1877 ("ci" "Ivory Coast") ; Cote D'ivoire
1878 ("ck" "Cook Islands")
154b3e39 1879 ("cl" "Chile" "The Republic of %s")
e1a34b58 1880 ("cm" "Cameroon") ; In .fr domain
154b3e39 1881 ("cn" "China" "The People's Republic of %s")
e1a34b58 1882 ("co" "Colombia")
154b3e39 1883 ("cr" "Costa Rica" "The Republic of %s")
e1a34b58 1884 ("cu" "Cuba")
0b725d8c
GM
1885 ("cv" "Cape Verde")
1886 ("cx" "Christmas Island")
e1a34b58
RS
1887 ("cy" "Cyprus")
1888 ("cz" "Czech Republic")
154b3e39 1889 ("de" "Germany")
0b725d8c 1890 ("dj" "Djibouti")
154b3e39
RS
1891 ("dk" "Denmark")
1892 ("dm" "Dominica")
1893 ("do" "Dominican Republic" "The %s")
e1a34b58 1894 ("dz" "Algeria")
154b3e39 1895 ("ec" "Ecuador" "The Republic of %s")
e1a34b58 1896 ("ee" "Estonia")
154b3e39 1897 ("eg" "Egypt" "The Arab Republic of %s")
0b725d8c 1898 ("eh" "Western Sahara")
e1a34b58 1899 ("er" "Eritrea")
154b3e39 1900 ("es" "Spain" "The Kingdom of %s")
44248b7f 1901 ("et" "Ethiopia")
154b3e39 1902 ("fi" "Finland" "The Republic of %s")
0b725d8c
GM
1903 ("fj" "Fiji")
1904 ("fk" "Falkland Islands (Malvinas)")
1905 ("fm" "Micronesia" "Federated States of %s")
e1a34b58 1906 ("fo" "Faroe Islands")
154b3e39 1907 ("fr" "France")
44248b7f
KH
1908 ("ga" "Gabon")
1909 ("gb" "United Kingdom")
e1a34b58
RS
1910 ("gd" "Grenada")
1911 ("ge" "Georgia")
0b725d8c
GM
1912 ("gf" "French Guiana")
1913 ("gh" "Ghana")
1914 ("gi" "Gibraltar")
44248b7f
KH
1915 ("gl" "Greenland")
1916 ("gm" "Gambia")
0b725d8c 1917 ("gn" "Guinea")
e1a34b58 1918 ("gp" "Guadeloupe (Fr.)")
0b725d8c 1919 ("gq" "Equatorial Guinea")
154b3e39 1920 ("gr" "Greece" "The Hellenic Republic (%s)")
97f83272 1921 ("gs" "South Georgia and The South Sandwich Islands")
e1a34b58
RS
1922 ("gt" "Guatemala")
1923 ("gu" "Guam (U.S.)")
0b725d8c
GM
1924 ("gw" "Guinea-Bissau")
1925 ("gy" "Guyana")
154b3e39 1926 ("hk" "Hong Kong")
97f83272 1927 ("hm" "Heard Island and Mcdonald Islands")
e1a34b58 1928 ("hn" "Honduras")
44248b7f 1929 ("hr" "Croatia" "Croatia (Hrvatska)")
e1a34b58 1930 ("ht" "Haiti")
44248b7f 1931 ("hu" "Hungary" "The Hungarian Republic")
e1a34b58 1932 ("id" "Indonesia")
154b3e39
RS
1933 ("ie" "Ireland")
1934 ("il" "Israel" "The State of %s")
0b725d8c 1935 ("im" "Isle of Man" "The %s") ; NOT in ISO 3166-1 of 2001-02-26
154b3e39 1936 ("in" "India" "The Republic of %s")
0b725d8c
GM
1937 ("io" "British Indian Ocean Territory")
1938 ("iq" "Iraq")
1939 ("ir" "Iran" "Islamic Republic of %s")
154b3e39
RS
1940 ("is" "Iceland" "The Republic of %s")
1941 ("it" "Italy" "The Italian Republic")
1942 ("jm" "Jamaica")
44248b7f 1943 ("jo" "Jordan")
154b3e39 1944 ("jp" "Japan")
e1a34b58 1945 ("ke" "Kenya")
0b725d8c
GM
1946 ("kg" "Kyrgyzstan")
1947 ("kh" "Cambodia")
1948 ("ki" "Kiribati")
1949 ("km" "Comoros")
1950 ("kn" "Saint Kitts and Nevis")
1951 ("kp" "Korea (North)" "Democratic People's Republic of Korea")
1952 ("kr" "Korea (South)" "Republic of Korea")
e1a34b58 1953 ("kw" "Kuwait")
0b725d8c 1954 ("ky" "Cayman Islands")
97f83272 1955 ("kz" "Kazakhstan")
0b725d8c 1956 ("la" "Lao People's Democratic Republic")
e1a34b58 1957 ("lb" "Lebanon")
0b725d8c 1958 ("lc" "Saint Lucia")
e1a34b58 1959 ("li" "Liechtenstein")
154b3e39 1960 ("lk" "Sri Lanka" "The Democratic Socialist Republic of %s")
0b725d8c 1961 ("lr" "Liberia")
e1a34b58
RS
1962 ("ls" "Lesotho")
1963 ("lt" "Lithuania")
1964 ("lu" "Luxembourg")
1965 ("lv" "Latvia")
0b725d8c 1966 ("ly" "Libyan Arab Jamahiriya")
e1a34b58 1967 ("ma" "Morocco")
44248b7f
KH
1968 ("mc" "Monaco")
1969 ("md" "Moldova" "The Republic of %s")
e1a34b58 1970 ("mg" "Madagascar")
0b725d8c
GM
1971 ("mh" "Marshall Islands")
1972 ("mk" "Macedonia" "The Former Yugoslav Republic of %s")
e1a34b58 1973 ("ml" "Mali")
0b725d8c
GM
1974 ("mm" "Myanmar")
1975 ("mn" "Mongolia")
97f83272 1976 ("mo" "Macao")
0b725d8c
GM
1977 ("mp" "Northern Mariana Islands")
1978 ("mq" "Martinique")
1979 ("mr" "Mauritania")
1980 ("ms" "Montserrat")
e1a34b58
RS
1981 ("mt" "Malta")
1982 ("mu" "Mauritius")
44248b7f 1983 ("mv" "Maldives")
e1a34b58 1984 ("mw" "Malawi")
154b3e39 1985 ("mx" "Mexico" "The United Mexican States")
97f83272 1986 ("my" "Malaysia")
e1a34b58 1987 ("mz" "Mozambique")
154b3e39 1988 ("na" "Namibia")
e1a34b58
RS
1989 ("nc" "New Caledonia (Fr.)")
1990 ("ne" "Niger") ; In .fr domain
0b725d8c
GM
1991 ("nf" "Norfolk Island")
1992 ("ng" "Nigeria")
154b3e39
RS
1993 ("ni" "Nicaragua" "The Republic of %s")
1994 ("nl" "Netherlands" "The Kingdom of the %s")
1995 ("no" "Norway" "The Kingdom of %s")
e1a34b58 1996 ("np" "Nepal") ; Via .in domain
0b725d8c 1997 ("nr" "Nauru")
44248b7f 1998 ("nu" "Niue")
154b3e39 1999 ("nz" "New Zealand")
0b725d8c 2000 ("om" "Oman")
e1a34b58 2001 ("pa" "Panama")
154b3e39 2002 ("pe" "Peru")
97f83272 2003 ("pf" "French Polynesia")
154b3e39
RS
2004 ("pg" "Papua New Guinea")
2005 ("ph" "Philippines" "The Republic of the %s")
e1a34b58 2006 ("pk" "Pakistan")
154b3e39 2007 ("pl" "Poland")
0b725d8c
GM
2008 ("pm" "Saint Pierre and Miquelon")
2009 ("pn" "Pitcairn")
e1a34b58 2010 ("pr" "Puerto Rico (U.S.)")
0b725d8c 2011 ("ps" "Palestinian Territory, Occupied")
4802af76 2012 ("pt" "Portugal" "The Portuguese Republic")
0b725d8c 2013 ("pw" "Palau")
154b3e39 2014 ("py" "Paraguay")
44248b7f 2015 ("qa" "Qatar")
e1a34b58
RS
2016 ("re" "Reunion (Fr.)") ; In .fr domain
2017 ("ro" "Romania")
0b725d8c
GM
2018 ("ru" "Russia" "Russian Federation")
2019 ("rw" "Rwanda")
e1a34b58 2020 ("sa" "Saudi Arabia")
0b725d8c 2021 ("sb" "Solomon Islands")
e1a34b58
RS
2022 ("sc" "Seychelles")
2023 ("sd" "Sudan")
154b3e39
RS
2024 ("se" "Sweden" "The Kingdom of %s")
2025 ("sg" "Singapore" "The Republic of %s")
0b725d8c 2026 ("sh" "Saint Helena")
e1a34b58 2027 ("si" "Slovenia")
0b725d8c 2028 ("sj" "Svalbard and Jan Mayen") ; In .no domain
e1a34b58 2029 ("sk" "Slovakia" "The Slovak Republic")
0b725d8c 2030 ("sl" "Sierra Leone")
44248b7f 2031 ("sm" "San Marino")
e1a34b58 2032 ("sn" "Senegal")
0b725d8c 2033 ("so" "Somalia")
154b3e39 2034 ("sr" "Suriname")
97f83272 2035 ("st" "Sao Tome and Principe")
0b725d8c
GM
2036 ("su" "U.S.S.R." "The Union of Soviet Socialist Republics")
2037 ("sv" "El Salvador")
2038 ("sy" "Syrian Arab Republic")
e1a34b58 2039 ("sz" "Swaziland")
97f83272 2040 ("tc" "Turks and Caicos Islands")
0b725d8c
GM
2041 ("td" "Chad")
2042 ("tf" "French Southern Territories")
e1a34b58 2043 ("tg" "Togo")
154b3e39 2044 ("th" "Thailand" "The Kingdom of %s")
0b725d8c
GM
2045 ("tj" "Tajikistan")
2046 ("tk" "Tokelau")
97f83272 2047 ("tl" "East Timor")
0b725d8c 2048 ("tm" "Turkmenistan")
154b3e39 2049 ("tn" "Tunisia")
44248b7f 2050 ("to" "Tonga")
0b725d8c 2051 ("tp" "East Timor")
154b3e39
RS
2052 ("tr" "Turkey" "The Republic of %s")
2053 ("tt" "Trinidad and Tobago")
0b725d8c
GM
2054 ("tv" "Tuvalu")
2055 ("tw" "Taiwan" "%s, Province of China")
2056 ("tz" "Tanzania" "United Republic of %s")
e1a34b58 2057 ("ua" "Ukraine")
0b725d8c 2058 ("ug" "Uganda")
44a1338a 2059 ("uk" "United Kingdom" "The %s of Great Britain and Northern Ireland")
0b725d8c 2060 ("um" "United States Minor Outlying Islands")
e1a34b58 2061 ("us" "United States" "The %s of America")
154b3e39 2062 ("uy" "Uruguay" "The Eastern Republic of %s")
0b725d8c
GM
2063 ("uz" "Uzbekistan")
2064 ("va" "Holy See (Vatican City State)")
97f83272 2065 ("vc" "Saint Vincent and the Grenadines")
154b3e39 2066 ("ve" "Venezuela" "The Republic of %s")
0b725d8c
GM
2067 ("vg" "Virgin Islands, British")
2068 ("vi" "Virgin Islands, U.S.")
e1a34b58
RS
2069 ("vn" "Vietnam")
2070 ("vu" "Vanuatu")
0b725d8c
GM
2071 ("wf" "Wallis and Futuna")
2072 ("ws" "Samoa")
2073 ("ye" "Yemen")
2074 ("yt" "Mayotte")
44248b7f
KH
2075 ("yu" "Yugoslavia" "Yugoslavia, AKA Serbia-Montenegro")
2076 ("za" "South Africa" "The Republic of %s")
0b725d8c 2077 ("zm" "Zambia")
154b3e39 2078 ("zw" "Zimbabwe" "Republic of %s")
97f83272
SJ
2079 ;; Generic Domains:
2080 ("aero" t "Air Transport Industry")
2081 ("biz" t "Businesses")
e1a34b58 2082 ("com" t "Commercial")
97f83272
SJ
2083 ("coop" t "Cooperative Associations")
2084 ("info" t "Info")
2085 ("museum" t "Museums")
2086 ("name" t "Individuals")
e1a34b58
RS
2087 ("net" t "Network")
2088 ("org" t "Non-profit Organization")
97f83272
SJ
2089 ;;("pro" t "Credentialed professionals")
2090 ;;("bitnet" t "Because It's Time NET")
2091 ("gov" t "United States Government")
2092 ("edu" t "Educational")
2093 ("mil" t "United States Military")
2094 ("int" t "International Treaties")
2095 ;;("nato" t "North Atlantic Treaty Organization")
e1a34b58 2096 ("uucp" t "Unix to Unix CoPy")
97f83272
SJ
2097 ;; Infrastructure Domains:
2098 ("arpa" t "Advanced Research Projects Agency (U.S. DoD)")
154b3e39
RS
2099 ))
2100 ob))
2101
2102;;;###autoload
2103(defun what-domain (domain)
e1cd65fd 2104 "Convert mail domain DOMAIN to the country it corresponds to."
154b3e39
RS
2105 (interactive
2106 (let ((completion-ignore-case t))
2107 (list (completing-read "Domain: "
2108 mail-extr-all-top-level-domains nil t))))
2109 (or (setq domain (intern-soft (downcase domain)
2110 mail-extr-all-top-level-domains))
29565a87 2111 (error "No such domain"))
154b3e39 2112 (message "%s: %s" (upcase (symbol-name domain)) (get domain 'domain-name)))
72c0ae01
ER
2113
2114\f
154b3e39
RS
2115;(let ((all nil))
2116; (mapatoms #'(lambda (x)
6c83d99f 2117; (if (and (boundp x)
154b3e39
RS
2118; (string-match "^mail-extr-" (symbol-name x)))
2119; (setq all (cons x all)))))
2120; (setq all (sort all #'string-lessp))
2121; (cons 'setq
2122; (apply 'nconc (mapcar #'(lambda (x)
2123; (list x (symbol-value x)))
2124; all))))
72c0ae01
ER
2125
2126\f
154b3e39 2127(provide 'mail-extr)
72c0ae01 2128
ab5796a9 2129;;; arch-tag: 7785fade-1073-4ed6-b4f6-28db34a7982d
72c0ae01 2130;;; mail-extr.el ends here