*** empty log message ***
[bpt/emacs.git] / lisp / mail / mail-extr.el
CommitLineData
72c0ae01
ER
1;;; mail-extr.el --- extract full name and address from RFC 822 mail header.
2
3a801d0c
ER
3;; Copyright (C) 1992 Free Software Foundation, Inc.
4
72c0ae01 5;; Author: Joe Wells <jbw@cs.bu.edu>
72c0ae01
ER
6;; Version: 1.0
7;; Adapted-By: ESR
8;; Keywords: mail
9
72c0ae01
ER
10;; This file is part of GNU Emacs.
11
12;; GNU Emacs is free software; you can redistribute it and/or modify
13;; it under the terms of the GNU General Public License as published by
14;; the Free Software Foundation; either version 1, or (at your option)
15;; any later version.
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
23;; along with GNU Emacs; see the file COPYING. If not, write to
24;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
25
26;;; Commentary:
27
28;; Here is `mail-extr', a package for extracting full names and canonical
29;; addresses from RFC 822 mail headers. It is intended to be hooked into
30;; other Emacs Lisp packages that deal with RFC 822 format messages, such as
31;; Gnews, GNUS, RMAIL, MH-E, BBDB, VM, Supercite, etc. Thus, this release is
32;; mainly for Emacs Lisp developers.
33
34;; There are two main benefits:
35
36;; 1. Higher probability of getting the correct full name for a human than
37;; any other package I know of. (On the other hand, it will cheerfully
38;; mangle non-human names/comments.)
39;; 2. Address part is put in a canonical form.
40
41;; The interface is not yet carved in stone; please give me suggestions.
42
43;; I have an extensive test-case collection of funny addresses if you want to
44;; work with the code. Developing this code requires frequent testing to
45;; make sure you're not breaking functionality. I'm not posting the
46;; test-cases because they take over 100K.
47
48;; If you find an address that mail-extr fails on, please send it to me along
49;; with what you think the correct results should be. I do not consider it a
50;; bug if mail-extr mangles a comment that does not correspond to a real
51;; human full name, although I would prefer that mail-extr would return the
52;; comment as-is.
53
54;; Features:
55
56;; * Full name handling:
57
58;; * knows where full names can be found in an address.
59;; * avoids using empty comments and quoted text.
60;; * extracts full names from mailbox names.
61;; * recognizes common formats for comments after a full name.
62;; * puts a period and a space after each initial.
63;; * understands & referring to the mailbox name capitalized.
64;; * strips name prefixes like "Prof.", etc..
65;; * understands what characters can occur in names (not just letters).
66;; * figures out middle initial from mailbox name.
67;; * removes funny nicknames.
68;; * keeps suffixes such as Jr., Sr., III, etc.
69;; * reorders "Last, First" type names.
70
71;; * Address handling:
72
73;; * parses rfc822 quoted text, comments, and domain literals.
74;; * parses rfc822 multi-line headers.
75;; * does something reasonable with rfc822 GROUP addresses.
76;; * handles many rfc822 noncompliant and garbage addresses.
77;; * canonicalizes addresses (after stripping comments/phrases outside <>).
78;; * converts ! addresses into .UUCP and %-style addresses.
79;; * converts rfc822 ROUTE addresses to %-style addresses.
80;; * truncates %-style addresses at leftmost fully qualified domain name.
81;; * handles local relative precedence of ! vs. % and @ (untested).
82
83;; It does almost no string creation. It primarily uses the built-in
84;; parsing routines with the appropriate syntax tables. This should
85;; result in greater speed.
86
87;; TODO:
88
89;; * handle all test cases. (This will take forever.)
90;; * software to pick the correct header to use (eg., "Senders-Name:").
91;; * multiple addresses in the "From:" header (almost all of the necessary
92;; code is there).
93;; * flag to not treat `,' as an address separator. (This is useful when
94;; there is a "From:" header but no "Sender:" header, because then there
95;; is only allowed to be one address.)
96;; * mailbox name does not necessarily contain full name.
97;; * fixing capitalization when it's all upper or lowercase. (Hard!)
98;; * some of the domain literal handling is missing. (But I've never even
99;; seen one of these in a mail address, so maybe no big deal.)
100;; * arrange to have syntax tables byte-compiled.
101;; * speed hacks.
102;; * delete unused variables.
103;; * arrange for testing with different relative precedences of ! vs. @
104;; and %.
105;; * put variant-method back into mail-extract-address-components.
106;; * insert documentation strings!
107;; * handle X.400-gatewayed addresses according to RFC 1148.
108
109;;; Change Log:
110;;
111;; Mon Apr 6 23:59:09 1992 Joe Wells (jbw at bigbird.bu.edu)
112;;
113;; * Cleaned up some more. Release version 1.0 to world.
114;;
115;; Sun Apr 5 19:39:08 1992 Joe Wells (jbw at bigbird.bu.edu)
116;;
117;; * Cleaned up full name extraction extensively.
118;;
119;; Sun Feb 2 14:45:24 1992 Joe Wells (jbw at bigbird.bu.edu)
120;;
121;; * Total rewrite. Integrated mail-canonicalize-address into
122;; mail-extract-address-components. Now handles GROUP addresses more
123;; or less correctly. Better handling of lots of different cases.
124;;
125;; Fri Jun 14 19:39:50 1991
126;; * Created.
127
128;;; Code:
129\f
130;; Variable definitions.
131
132(defvar mail-@-binds-tighter-than-! nil)
133
134;;----------------------------------------------------------------------
135;; what orderings are meaningful?????
136;;(defvar mail-operator-precedence-list '(?! ?% ?@))
137;; Right operand of a % or a @ must be a domain name, period. No other
138;; operators allowed. Left operand of a @ is an address relative to that
139;; site.
140
141;; Left operand of a ! must be a domain name. Right operand is an
142;; arbitrary address.
143;;----------------------------------------------------------------------
144
145(defconst mail-space-char 32)
146
147(defconst mail-whitespace " \t\n")
148
149;; Any character that can occur in a name in an RFC822 address.
150;; Yes, there are weird people with digits in their names.
151(defconst mail-all-letters "A-Za-z---{|}'~0-9`.")
152
153;; Any character that can occur in a name, not counting characters that
154;; separate parts of a multipart name.
155(defconst mail-all-letters-but-separators "A-Za-z{|}'~0-9`")
156
157;; Any character that can start a name
158(defconst mail-first-letters "A-Za-z")
159
160;; Any character that can end a name.
161(defconst mail-last-letters "A-Za-z`'.")
162
163;; Matches an initial not followed by both a period and a space.
164(defconst mail-bad-initials-pattern
165 (format "\\(\\([^%s]\\|\\`\\)[%s]\\)\\(\\.\\([^ ]\\)\\| \\|\\([^%s .]\\)\\|\\'\\)"
166 mail-all-letters mail-first-letters mail-all-letters))
167
168(defconst mail-non-name-chars (concat "^" mail-all-letters "."))
169
170(defconst mail-non-begin-name-chars (concat "^" mail-first-letters))
171
172(defconst mail-non-end-name-chars (concat "^" mail-last-letters))
173
174;; Matches periods used instead of spaces. Must not match the period
175;; following an initial.
176(defconst mail-bad-\.-pattern
177 (format "\\([%s][%s]\\)\\.+\\([%s]\\)"
178 mail-all-letters mail-last-letters mail-first-letters))
179
180;; Matches an embedded or leading nickname that should be removed.
181(defconst mail-nickname-pattern
182 (format "\\([ .]\\|\\`\\)[\"'`\[\(]\\([ .%s]+\\)[\]\"'\)] "
183 mail-all-letters))
184
185;; Matches a leading title that is not part of the name (does not
186;; contribute to uniquely identifying the person).
187(defconst mail-full-name-prefixes
188 '"\\` *\\(Prof\\|Dr\\|Mrs?\\|Rev\\|Rabbi\\|SysOp\\|LCDR\\)\\.? ")
189
190;; Matches the occurrence of a generational name suffix, and the last
191;; character of the preceding name.
192(defconst mail-full-name-suffix-pattern
193 (format
194 "\\(,? ?\\([JjSs]r\\.?\\|V?I+V?\\)\\)\\([^%s]\\([^%s]\\|\\'\\)\\|\\'\\)"
195 mail-all-letters mail-all-letters))
196
197(defconst mail-roman-numeral-pattern
198 "V?I+V?\\b")
199
200;; Matches a trailing uppercase (with other characters possible) acronym.
201;; Must not match a trailing uppercase last name or trailing initial
202(defconst mail-weird-acronym-pattern "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)")
203
204;; Matches a mixed-case or lowercase name (not an initial).
205(defconst mail-mixed-case-name-pattern
206 (format
207 "\\b\\([a-z][%s]*[%s]\\|[%s][%s]*[a-z][%s]*[%s]\\|[%s][%s]*[a-z]\\)"
208 mail-all-letters mail-last-letters
209 mail-first-letters mail-all-letters mail-all-letters mail-last-letters
210 mail-first-letters mail-all-letters))
211
212;; Matches a trailing alternative address.
213(defconst mail-alternative-address-pattern "[a-zA-Z.]+[!@][a-zA-Z.]")
214
215;; Matches a variety of trailing comments not including comma-delimited
216;; comments.
217(defconst mail-trailing-comment-start-pattern " [-{]\\|--\\|[+@#></\;]")
218
219;; Matches a name (not an initial).
220;; This doesn't force a word boundary at the end because sometimes a
221;; comment is separated by a `-' with no preceding space.
222(defconst mail-name-pattern
223 (format
224 "\\b[%s][%s]*[%s]"
225 mail-first-letters mail-all-letters mail-last-letters))
226
227(defconst mail-initial-pattern
228 (format "\\b[%s]\\([. ]\\|\\b\\)" mail-first-letters))
229
230;; Matches a single name before a comma.
231(defconst mail-last-name-first-pattern
232 (concat "\\`" mail-name-pattern ","))
233
234;; Matches telephone extensions.
235(defconst mail-telephone-extension-pattern
236 "\\(\\([Ee]xt\\|[Tt]el\\|[Xx]\\).?\\)? *\\+?[0-9][- 0-9]+")
237
238;; Matches ham radio call signs.
239(defconst mail-ham-call-sign-pattern
240 "\\b[A-Z]+[0-9][A-Z0-9]*")
241
242;; Matches normal single-part name
243(defconst mail-normal-name-pattern
244 (format
245 "\\b[%s][%s]+[%s]"
246 mail-first-letters mail-all-letters-but-separators mail-last-letters))
247
248;; Matches normal two names with missing middle initial
249(defconst mail-two-name-pattern
250 (concat "\\`\\(" mail-normal-name-pattern
251 "\\|" mail-initial-pattern
252 "\\) +\\(" mail-normal-name-pattern "\\)\\(,\\|\\'\\)"))
253
254(defvar address-syntax-table (make-syntax-table))
255(defvar address-comment-syntax-table (make-syntax-table))
256(defvar address-domain-literal-syntax-table (make-syntax-table))
257(defvar address-text-comment-syntax-table (make-syntax-table))
258(defvar address-text-syntax-table (make-syntax-table))
259(mapcar
260 (function
261 (lambda (pair)
262 (let ((syntax-table (symbol-value (car pair))))
263 (mapcar
264 (function
265 (lambda (item)
266 (if (eq 2 (length item))
267 (modify-syntax-entry (car item) (car (cdr item)) syntax-table)
268 (let ((char (car item))
269 (bound (car (cdr item)))
270 (syntax (car (cdr (cdr item)))))
271 (while (<= char bound)
272 (modify-syntax-entry char syntax syntax-table)
273 (setq char (1+ char)))))))
274 (cdr pair)))))
275 '((address-syntax-table
276 (0 31 "w") ;control characters
277 (32 " ") ;SPC
278 (?! ?~ "w") ;printable characters
279 (127 "w") ;DEL
280 (128 255 "w") ;high-bit-on characters
281 (?\t " ")
282 (?\r " ")
283 (?\n " ")
284 (?\( ".")
285 (?\) ".")
286 (?< ".")
287 (?> ".")
288 (?@ ".")
289 (?, ".")
290 (?\; ".")
291 (?: ".")
292 (?\\ "\\")
293 (?\" "\"")
294 (?. ".")
295 (?\[ ".")
296 (?\] ".")
297 ;; % and ! aren't RFC822 characters, but it is convenient to pretend
298 (?% ".")
299 (?! ".")
300 )
301 (address-comment-syntax-table
302 (0 255 "w")
303 (?\( "\(\)")
304 (?\) "\)\(")
305 (?\\ "\\"))
306 (address-domain-literal-syntax-table
307 (0 255 "w")
308 (?\[ "\(\]") ;??????
309 (?\] "\)\[") ;??????
310 (?\\ "\\"))
311 (address-text-comment-syntax-table
312 (0 255 "w")
313 (?\( "\(\)")
314 (?\) "\)\(")
315 (?\[ "\(\]")
316 (?\] "\)\[")
317 (?\{ "\(\}")
318 (?\} "\)\{")
319 (?\\ "\\")
320 (?\" "\"")
321 ;; (?\' "\)\`")
322 ;; (?\` "\(\'")
323 )
324 (address-text-syntax-table
325 (0 255 ".")
326 (?A ?Z "w")
327 (?a ?z "w")
328 (?- "w")
329 (?\} "w")
330 (?\{ "w")
331 (?| "w")
332 (?\' "w")
333 (?~ "w")
334 (?0 ?9 "w"))
335 ))
336
337\f
338;; Utility functions and macros.
339
340(defmacro undo-backslash-quoting (beg end)
341 (`(save-excursion
342 (save-restriction
343 (narrow-to-region (, beg) (, end))
344 (goto-char (point-min))
345 ;; undo \ quoting
346 (while (re-search-forward "\\\\\\(.\\)" nil t)
347 (replace-match "\\1")
348 ;; CHECK: does this leave point after the replacement?
349 )))))
350
351(defmacro mail-nuke-char-at (pos)
352 (` (save-excursion
353 (goto-char (, pos))
354 (delete-char 1)
355 (insert mail-space-char))))
356
357(defmacro mail-nuke-elements-outside-range (list-symbol beg-symbol end-symbol
358 &optional no-replace)
359 (` (progn
360 (setq temp (, list-symbol))
361 (while temp
362 (cond ((or (> (car temp) (, end-symbol))
363 (< (car temp) (, beg-symbol)))
364 (, (or no-replace
365 (` (mail-nuke-char-at (car temp)))))
366 (setcar temp nil)))
367 (setq temp (cdr temp)))
368 (setq (, list-symbol) (delq nil (, list-symbol))))))
369
370(defun mail-demarkerize (marker)
371 (and marker
372 (if (markerp marker)
373 (let ((temp (marker-position marker)))
374 (set-marker marker nil)
375 temp)
376 marker)))
377
378(defun mail-markerize (pos)
379 (and pos
380 (if (markerp pos)
381 pos
382 (copy-marker pos))))
383
384(defmacro mail-last-element (list)
385 "Return last element of LIST."
386 (` (let ((list (, list)))
387 (while (not (null (cdr list)))
388 (setq list (cdr list)))
389 (car list))))
390
391(defmacro safe-move-sexp (arg)
392 "Safely skip over one balanced sexp, if there is one. Return t if success."
393 (` (condition-case error
394 (progn
395 (goto-char (scan-sexps (point) (, arg)))
396 t)
397 (error
398 (if (string-equal (nth 1 error) "Unbalanced parentheses")
399 nil
400 (while t
401 (signal (car error) (cdr error))))))))
402
403\f
404;; The main function to grind addresses
405
406(defun mail-extract-address-components (address)
407 "Given an rfc 822 ADDRESS, extract full name and canonical address.
408Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
409 (let ((canonicalization-buffer (get-buffer-create "*canonical address*"))
410 (extraction-buffer (get-buffer-create "*extract address components*"))
411 (foo 'bar)
412 char
413 multiple-addresses
414 <-pos >-pos @-pos :-pos ,-pos !-pos %-pos \;-pos
415 group-:-pos group-\;-pos route-addr-:-pos
416 record-pos-symbol
417 first-real-pos last-real-pos
418 phrase-beg phrase-end
419 comment-beg comment-end
420 quote-beg quote-end
421 atom-beg atom-end
422 mbox-beg mbox-end
423 \.-ends-name
424 temp
425 name-suffix
426 saved-point
427 fi mi li
428 saved-%-pos saved-!-pos saved-@-pos
429 domain-pos \.-pos insert-point)
430
431 (save-excursion
432 (set-buffer extraction-buffer)
433 (buffer-flush-undo extraction-buffer)
434 (set-syntax-table address-syntax-table)
435 (widen)
436 (erase-buffer)
437 (setq case-fold-search nil)
438
439 ;; Insert extra space at beginning to allow later replacement with <
440 ;; without having to move markers.
441 (insert mail-space-char address)
442
443 ;; stolen from rfc822.el
444 ;; Unfold multiple lines.
445 (goto-char (point-min))
446 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
447 (replace-match "\\1 " t))
448
449 ;; first pass grabs useful information about address
450 (goto-char (point-min))
451 (while (progn
452 (skip-chars-forward mail-whitespace)
453 (not (eobp)))
454 (setq char (char-after (point)))
455 (or first-real-pos
456 (if (not (eq char ?\())
457 (setq first-real-pos (point))))
458 (cond
459 ;; comment
460 ((eq char ?\()
461 (set-syntax-table address-comment-syntax-table)
462 ;; only record the first non-empty comment's position
463 (if (and (not comment-beg)
464 (save-excursion
465 (forward-char 1)
466 (skip-chars-forward mail-whitespace)
467 (not (eq ?\) (char-after (point))))))
468 (setq comment-beg (point)))
469 ;; TODO: don't record if unbalanced
470 (or (safe-move-sexp 1)
471 (forward-char 1))
472 (set-syntax-table address-syntax-table)
473 (if (and comment-beg
474 (not comment-end))
475 (setq comment-end (point))))
476 ;; quoted text
477 ((eq char ?\")
478 ;; only record the first non-empty quote's position
479 (if (and (not quote-beg)
480 (save-excursion
481 (forward-char 1)
482 (skip-chars-forward mail-whitespace)
483 (not (eq ?\" (char-after (point))))))
484 (setq quote-beg (point)))
485 ;; TODO: don't record if unbalanced
486 (or (safe-move-sexp 1)
487 (forward-char 1))
488 (if (and quote-beg
489 (not quote-end))
490 (setq quote-end (point))))
491 ;; domain literals
492 ((eq char ?\[)
493 (set-syntax-table address-domain-literal-syntax-table)
494 (or (safe-move-sexp 1)
495 (forward-char 1))
496 (set-syntax-table address-syntax-table))
497 ;; commas delimit addresses when outside < > pairs.
498 ((and (eq char ?,)
499 (or (null <-pos)
500 (and >-pos
501 ;; handle weird munged addresses
502 (> (mail-last-element <-pos) (car >-pos)))))
503 (setq multiple-addresses t)
504 (delete-char 1)
505 (narrow-to-region (point-min) (point)))
506 ;; record the position of various interesting chars, determine
507 ;; legality later.
508 ((setq record-pos-symbol
509 (cdr (assq char
510 '((?< . <-pos) (?> . >-pos) (?@ . @-pos)
511 (?: . :-pos) (?, . ,-pos) (?! . !-pos)
512 (?% . %-pos) (?\; . \;-pos)))))
513 (set record-pos-symbol
514 (cons (point) (symbol-value record-pos-symbol)))
515 (forward-char 1))
516 ((eq char ?.)
517 (forward-char 1))
518 ((memq char '(
519 ;; comment terminator illegal
520 ?\)
521 ;; domain literal terminator illegal
522 ?\]
523 ;; \ allowed only within quoted strings,
524 ;; domain literals, and comments
525 ?\\
526 ))
527 (mail-nuke-char-at (point))
528 (forward-char 1))
529 (t
530 (forward-word 1)))
531 (or (eq char ?\()
532 (setq last-real-pos (point))))
533
534 ;; Use only the leftmost <, if any. Replace all others with spaces.
535 (while (cdr <-pos)
536 (mail-nuke-char-at (car <-pos))
537 (setq <-pos (cdr <-pos)))
538
539 ;; Use only the rightmost >, if any. Replace all others with spaces.
540 (while (cdr >-pos)
541 (mail-nuke-char-at (nth 1 >-pos))
542 (setcdr >-pos (nthcdr 2 >-pos)))
543
544 ;; If multiple @s and a :, but no < and >, insert around buffer.
545 ;; This commonly happens on the UUCP "From " line. Ugh.
546 (cond ((and (> (length @-pos) 1)
547 :-pos ;TODO: check if between @s
548 (not <-pos))
549 (goto-char (point-min))
550 (delete-char 1)
551 (setq <-pos (list (point)))
552 (insert ?<)))
553
554 ;; If < but no >, insert > in rightmost possible position
555 (cond ((and <-pos
556 (null >-pos))
557 (goto-char (point-max))
558 (setq >-pos (list (point)))
559 (insert ?>)))
560
561 ;; If > but no <, replace > with space.
562 (cond ((and >-pos
563 (null <-pos))
564 (mail-nuke-char-at (car >-pos))
565 (setq >-pos nil)))
566
567 ;; Turn >-pos and <-pos into non-lists
568 (setq >-pos (car >-pos)
569 <-pos (car <-pos))
570
571 ;; Trim other punctuation lists of items outside < > pair to handle
572 ;; stupid MTAs.
573 (cond (<-pos ; don't need to check >-pos also
574 ;; handle bozo software that violates RFC 822 by sticking
575 ;; punctuation marks outside of a < > pair
576 (mail-nuke-elements-outside-range @-pos <-pos >-pos t)
577 ;; RFC 822 says nothing about these two outside < >, but
578 ;; remove those positions from the lists to make things
579 ;; easier.
580 (mail-nuke-elements-outside-range !-pos <-pos >-pos t)
581 (mail-nuke-elements-outside-range %-pos <-pos >-pos t)))
582
583 ;; Check for : that indicates GROUP list and for : part of
584 ;; ROUTE-ADDR spec.
585 ;; Can't possibly be more than two :. Nuke any extra.
586 (while :-pos
587 (setq temp (car :-pos)
588 :-pos (cdr :-pos))
589 (cond ((and <-pos >-pos
590 (> temp <-pos)
591 (< temp >-pos))
592 (if (or route-addr-:-pos
593 (< (length @-pos) 2)
594 (> temp (car @-pos))
595 (< temp (nth 1 @-pos)))
596 (mail-nuke-char-at temp)
597 (setq route-addr-:-pos temp)))
598 ((or (not <-pos)
599 (and <-pos
600 (< temp <-pos)))
601 (setq group-:-pos temp))))
602
603 ;; Nuke any ; that is in or to the left of a < > pair or to the left
604 ;; of a GROUP starting :. Also, there may only be one ;.
605 (while \;-pos
606 (setq temp (car \;-pos)
607 \;-pos (cdr \;-pos))
608 (cond ((and <-pos >-pos
609 (> temp <-pos)
610 (< temp >-pos))
611 (mail-nuke-char-at temp))
612 ((and (or (not group-:-pos)
613 (> temp group-:-pos))
614 (not group-\;-pos))
615 (setq group-\;-pos temp))))
616
617 ;; Handle junk like ";@host.company.dom" that sendmail adds.
618 ;; **** should I remember comment positions?
619 (and group-\;-pos
620 ;; this is fine for now
621 (mail-nuke-elements-outside-range !-pos group-:-pos group-\;-pos t)
622 (mail-nuke-elements-outside-range @-pos group-:-pos group-\;-pos t)
623 (mail-nuke-elements-outside-range %-pos group-:-pos group-\;-pos t)
624 (mail-nuke-elements-outside-range ,-pos group-:-pos group-\;-pos t)
625 (and last-real-pos
626 (> last-real-pos (1+ group-\;-pos))
627 (setq last-real-pos (1+ group-\;-pos)))
628 (and comment-end
629 (> comment-end group-\;-pos)
630 (setq comment-end nil
631 comment-beg nil))
632 (and quote-end
633 (> quote-end group-\;-pos)
634 (setq quote-end nil
635 quote-beg nil))
636 (narrow-to-region (point-min) group-\;-pos))
637
638 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any
639 ;; others.
640 ;; Hell, go ahead an nuke all of the commas.
641 ;; **** This will cause problems when we start handling commas in
642 ;; the PHRASE part .... no it won't ... yes it will ... ?????
643 (mail-nuke-elements-outside-range ,-pos 1 1)
644
645 ;; can only have multiple @s inside < >. The fact that some MTAs
646 ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is
647 ;; handled above.
648
649 ;; Locate PHRASE part of ROUTE-ADDR.
650 (cond (<-pos
651 (goto-char <-pos)
652 (skip-chars-backward mail-whitespace)
653 (setq phrase-end (point))
654 (goto-char (or ;;group-:-pos
655 (point-min)))
656 (skip-chars-forward mail-whitespace)
657 (if (< (point) phrase-end)
658 (setq phrase-beg (point))
659 (setq phrase-end nil))))
660
661 ;; handle ROUTE-ADDRS with real ROUTEs.
662 ;; If there are multiple @s, then we assume ROUTE-ADDR syntax, and
663 ;; any % or ! must be semantically meaningless.
664 ;; TODO: do this processing into canonicalization buffer
665 (cond (route-addr-:-pos
666 (setq !-pos nil
667 %-pos nil
668 >-pos (copy-marker >-pos)
669 route-addr-:-pos (copy-marker route-addr-:-pos))
670 (goto-char >-pos)
671 (insert-before-markers ?X)
672 (goto-char (car @-pos))
673 (while (setq @-pos (cdr @-pos))
674 (delete-char 1)
675 (setq %-pos (cons (point-marker) %-pos))
676 (insert "%")
677 (goto-char (1- >-pos))
678 (save-excursion
679 (insert-buffer-substring extraction-buffer
680 (car @-pos) route-addr-:-pos)
681 (delete-region (car @-pos) route-addr-:-pos))
682 (or (cdr @-pos)
683 (setq saved-@-pos (list (point)))))
684 (setq @-pos saved-@-pos)
685 (goto-char >-pos)
686 (delete-char -1)
687 (mail-nuke-char-at route-addr-:-pos)
688 (mail-demarkerize route-addr-:-pos)
689 (setq route-addr-:-pos nil
690 >-pos (mail-demarkerize >-pos)
691 %-pos (mapcar 'mail-demarkerize %-pos))))
692
693 ;; de-listify @-pos
694 (setq @-pos (car @-pos))
695
696 ;; TODO: remove comments in the middle of an address
697
698 (set-buffer canonicalization-buffer)
699
700 (buffer-flush-undo canonicalization-buffer)
701 (set-syntax-table address-syntax-table)
702 (setq case-fold-search nil)
703
704 (widen)
705 (erase-buffer)
706 (insert-buffer-substring extraction-buffer)
707
708 (if <-pos
709 (narrow-to-region (progn
710 (goto-char (1+ <-pos))
711 (skip-chars-forward mail-whitespace)
712 (point))
713 >-pos)
714 ;; ****** Oh no! What if the address is completely empty!
715 (narrow-to-region first-real-pos last-real-pos))
716
717 (and @-pos %-pos
718 (mail-nuke-elements-outside-range %-pos (point-min) @-pos))
719 (and %-pos !-pos
720 (mail-nuke-elements-outside-range !-pos (point-min) (car %-pos)))
721 (and @-pos !-pos (not %-pos)
722 (mail-nuke-elements-outside-range !-pos (point-min) @-pos))
723
724 ;; Error condition:?? (and %-pos (not @-pos))
725
726 (cond (!-pos
727 ;; **** I don't understand this save-restriction and the
728 ;; narrow-to-region inside it. Why did I do that?
729 (save-restriction
730 (cond ((and @-pos
731 mail-@-binds-tighter-than-!)
732 (goto-char @-pos)
733 (setq %-pos (cons (point) %-pos)
734 @-pos nil)
735 (delete-char 1)
736 (insert "%")
737 (setq insert-point (point-max)))
738 (mail-@-binds-tighter-than-!
739 (setq insert-point (point-max)))
740 (%-pos
741 (setq insert-point (mail-last-element %-pos)
742 saved-%-pos (mapcar 'mail-markerize %-pos)
743 %-pos nil
744 @-pos (mail-markerize @-pos)))
745 (@-pos
746 (setq insert-point @-pos)
747 (setq @-pos (mail-markerize @-pos)))
748 (t
749 (setq insert-point (point-max))))
750 (narrow-to-region (point-min) insert-point)
751 (setq saved-!-pos (car !-pos))
752 (while !-pos
753 (goto-char (point-max))
754 (cond ((and (not @-pos)
755 (not (cdr !-pos)))
756 (setq @-pos (point))
757 (insert-before-markers "@ "))
758 (t
759 (setq %-pos (cons (point) %-pos))
760 (insert-before-markers "% ")))
761 (backward-char 1)
762 (insert-buffer-substring
763 (current-buffer)
764 (if (nth 1 !-pos)
765 (1+ (nth 1 !-pos))
766 (point-min))
767 (car !-pos))
768 (delete-char 1)
769 (or (save-excursion
770 (safe-move-sexp -1)
771 (skip-chars-backward mail-whitespace)
772 (eq ?. (preceding-char)))
773 (insert-before-markers
774 (if (save-excursion
775 (skip-chars-backward mail-whitespace)
776 (eq ?. (preceding-char)))
777 ""
778 ".")
779 "uucp"))
780 (setq !-pos (cdr !-pos))))
781 (and saved-%-pos
782 (setq %-pos (append (mapcar 'mail-demarkerize saved-%-pos)
783 %-pos)))
784 (setq @-pos (mail-demarkerize @-pos))
785 (narrow-to-region (1+ saved-!-pos) (point-max))))
786 (cond ((and %-pos
787 (not @-pos))
788 (goto-char (car %-pos))
789 (delete-char 1)
790 (setq @-pos (point))
791 (insert "@")
792 (setq %-pos (cdr %-pos))))
793 (setq %-pos (nreverse %-pos))
794 ;; RFC 1034 doesn't approve of this, oh well:
795 (downcase-region (or (car %-pos) @-pos (point-max)) (point-max))
796 (cond (%-pos ; implies @-pos valid
797 (setq temp %-pos)
798 (catch 'truncated
799 (while temp
800 (goto-char (or (nth 1 temp)
801 @-pos))
802 (skip-chars-backward mail-whitespace)
803 (save-excursion
804 (safe-move-sexp -1)
805 (setq domain-pos (point))
806 (skip-chars-backward mail-whitespace)
807 (setq \.-pos (eq ?. (preceding-char))))
808 (cond ((and \.-pos
809 (get
810 (intern
811 (buffer-substring domain-pos (point)))
812 'domain-name))
813 (narrow-to-region (point-min) (point))
814 (goto-char (car temp))
815 (delete-char 1)
816 (setq @-pos (point))
817 (setcdr temp nil)
818 (setq %-pos (delq @-pos %-pos))
819 (insert "@")
820 (throw 'truncated t)))
821 (setq temp (cdr temp))))))
822 (setq mbox-beg (point-min)
823 mbox-end (if %-pos (car %-pos)
824 (or @-pos
825 (point-max))))
826
827 ;; Done canonicalizing address.
828
829 (set-buffer extraction-buffer)
830
831 ;; Find the full name
832
833 (cond ((and phrase-beg
834 (eq quote-beg phrase-beg)
835 (<= quote-end phrase-end))
836 (narrow-to-region (1+ quote-beg) (1- quote-end))
837 (undo-backslash-quoting (point-min) (point-max)))
838 (phrase-beg
839 (narrow-to-region phrase-beg phrase-end))
840 (comment-beg
841 (narrow-to-region (1+ comment-beg) (1- comment-end))
842 (undo-backslash-quoting (point-min) (point-max)))
843 (t
844 ;; *** Work in canon buffer instead? No, can't. Hmm.
845 (delete-region (point-min) (point-max))
846 (insert-buffer-substring canonicalization-buffer
847 mbox-beg mbox-end)
848 (goto-char (point-min))
849 (setq \.-ends-name (search-forward "_" nil t))
850 (goto-char (point-min))
851 (while (progn
852 (skip-chars-forward mail-whitespace)
853 (not (eobp)))
854 (setq char (char-after (point)))
855 (cond
856 ((eq char ?\")
857 (setq quote-beg (point))
858 (or (safe-move-sexp 1)
859 ;; TODO: handle this error condition!!!!!
860 (forward-char 1))
861 ;; take into account deletions
862 (setq quote-end (- (point) 2))
863 (save-excursion
864 (backward-char 1)
865 (delete-char 1)
866 (goto-char quote-beg)
867 (delete-char 1))
868 (undo-backslash-quoting quote-beg quote-end)
869 (or (eq mail-space-char (char-after (point)))
870 (insert " "))
871 (setq \.-ends-name t))
872 ((eq char ?.)
873 (if (eq (char-after (1+ (point))) ?_)
874 (progn
875 (forward-char 1)
876 (delete-char 1)
877 (insert mail-space-char))
878 (if \.-ends-name
879 (narrow-to-region (point-min) (point))
880 (delete-char 1)
881 (insert " "))))
882 ((memq (char-syntax char) '(?. ?\\))
883 (delete-char 1)
884 (insert " "))
885 (t
886 (setq atom-beg (point))
887 (forward-word 1)
888 (setq atom-end (point))
889 (save-restriction
890 (narrow-to-region atom-beg atom-end)
891 (goto-char (point-min))
892 (while (re-search-forward "\\([^_]+\\)_" nil t)
893 (replace-match "\\1 "))
894 (goto-char (point-max))))))))
895
896 (set-syntax-table address-text-syntax-table)
897
898 (setq xxx (variant-method (buffer-string)))
899 (delete-region (point-min) (point-max))
900 (insert xxx)
901 (goto-char (point-min))
902
903;; ;; Compress whitespace
904;; (goto-char (point-min))
905;; (while (re-search-forward "[ \t\n]+" nil t)
906;; (replace-match " "))
907;;
908;; ;; Fix . used as space
909;; (goto-char (point-min))
910;; (while (re-search-forward mail-bad-\.-pattern nil t)
911;; (replace-match "\\1 \\2"))
912;;
913;; ;; Delete trailing parenthesized comment
914;; (goto-char (point-max))
915;; (skip-chars-backward mail-whitespace)
916;; (cond ((memq (char-after (1- (point))) '(?\) ?\} ?\]))
917;; (setq comment-end (point))
918;; (set-syntax-table address-text-comment-syntax-table)
919;; (or (safe-move-sexp -1)
920;; (backward-char 1))
921;; (set-syntax-table address-text-syntax-table)
922;; (setq comment-beg (point))
923;; (skip-chars-backward mail-whitespace)
924;; (if (bobp)
925;; (narrow-to-region (1+ comment-beg) (1- comment-end))
926;; (narrow-to-region (point-min) (point)))))
927;;
928;; ;; Find, save, and delete any name suffix
929;; ;; *** Broken!
930;; (goto-char (point-min))
931;; (cond ((re-search-forward mail-full-name-suffix-pattern nil t)
932;; (setq name-suffix (buffer-substring (match-beginning 3)
933;; (match-end 3)))
934;; (replace-match "\\1 \\4")))
935;;
936;; ;; Delete ALL CAPS words and after, if preceded by mixed-case or
937;; ;; lowercase words. Eg. XT-DEM.
938;; (goto-char (point-min))
939;; ;; ## This will lose on something like "SMITH MAX".
940;; ;; ## maybe it should be
941;; ;; ## " \\([A-Z]+[-_/][A-Z]+\\|[A-Z][A-Z][A-Z]\\)\\b.*[^A-Z \t]"
942;; ;; ## that is, three-letter-upper-case-word with non-upper-case
943;; ;; ## characters following it.
944;; (if (re-search-forward mail-mixed-case-name-pattern nil t)
945;; (if (re-search-forward mail-weird-acronym-pattern nil t)
946;; (narrow-to-region (point-min) (match-beginning 0))))
947;;
948;; ;; Delete trailing alternative address
949;; (goto-char (point-min))
950;; (if (re-search-forward mail-alternative-address-pattern nil t)
951;; (narrow-to-region (point-min) (match-beginning 0)))
952;;
953;; ;; Delete trailing comment
954;; (goto-char (point-min))
955;; (if (re-search-forward mail-trailing-comment-start-pattern nil t)
956;; (or (progn
957;; (goto-char (match-beginning 0))
958;; (skip-chars-backward mail-whitespace)
959;; (bobp))
960;; (narrow-to-region (point-min) (match-beginning 0))))
961;;
962;; ;; Delete trailing comma-separated comment
963;; (goto-char (point-min))
964;; ;; ## doesn't this break "Smith, John"? Yes.
965;; (re-search-forward mail-last-name-first-pattern nil t)
966;; (while (search-forward "," nil t)
967;; (or (save-excursion
968;; (backward-char 2)
969;; (looking-at mail-full-name-suffix-pattern))
970;; (narrow-to-region (point-min) (1- (point)))))
971;;
972;; ;; Delete telephone numbers and ham radio call signs
973;; (goto-char (point-min))
974;; (if (re-search-forward mail-telephone-extension-pattern nil t)
975;; (narrow-to-region (point-min) (match-beginning 0)))
976;; (goto-char (point-min))
977;; (if (re-search-forward mail-ham-call-sign-pattern nil t)
978;; (if (eq (match-beginning 0) (point-min))
979;; (narrow-to-region (match-end 0) (point-max))
980;; (narrow-to-region (point-min) (match-beginning 0))))
981;;
982;; ;; Delete trailing word followed immediately by .
983;; (goto-char (point-min))
984;; ;; ## what's this for? doesn't it mess up "Public, Harry Q."? No.
985;; (if (re-search-forward "\\b[A-Za-z][A-Za-z]+\\. *\\'" nil t)
986;; (narrow-to-region (point-min) (match-beginning 0)))
987;;
988;; ;; Handle & substitution
989;; ;; TODO: remember to disable middle initial guessing
990;; (goto-char (point-min))
991;; (cond ((re-search-forward "\\( \\|\\`\\)&\\( \\|\\'\\)" nil t)
992;; (goto-char (match-end 1))
993;; (delete-char 1)
994;; (capitalize-region
995;; (point)
996;; (progn
997;; (insert-buffer-substring canonicalization-buffer
998;; mbox-beg mbox-end)
999;; (point)))))
1000;;
1001;; ;; Delete nickname
1002;; (goto-char (point-min))
1003;; (if (re-search-forward mail-nickname-pattern nil t)
1004;; (replace-match (if (eq (match-beginning 2) (1- (match-end 2)))
1005;; " \\2 "
1006;; " ")))
1007;;
1008;; ;; Fixup initials
1009;; (while (progn
1010;; (goto-char (point-min))
1011;; (re-search-forward mail-bad-initials-pattern nil t))
1012;; (replace-match
1013;; (if (match-beginning 4)
1014;; "\\1. \\4"
1015;; (if (match-beginning 5)
1016;; "\\1. \\5"
1017;; "\\1. "))))
1018;;
1019;; ;; Delete title
1020;; (goto-char (point-min))
1021;; (if (re-search-forward mail-full-name-prefixes nil t)
1022;; (narrow-to-region (point) (point-max)))
1023;;
1024;; ;; Delete trailing and preceding non-name characters
1025;; (goto-char (point-min))
1026;; (skip-chars-forward mail-non-begin-name-chars)
1027;; (narrow-to-region (point) (point-max))
1028;; (goto-char (point-max))
1029;; (skip-chars-backward mail-non-end-name-chars)
1030;; (narrow-to-region (point-min) (point))
1031
1032 ;; If name is "First Last" and userid is "F?L", then assume
1033 ;; the middle initial is the second letter in the userid.
1034 ;; initially by Jamie Zawinski <jwz@lucid.com>
1035 (cond ((and (eq 3 (- mbox-end mbox-beg))
1036 (progn
1037 (goto-char (point-min))
1038 (looking-at mail-two-name-pattern)))
1039 (setq fi (char-after (match-beginning 0))
1040 li (char-after (match-beginning 3)))
1041 (save-excursion
1042 (set-buffer canonicalization-buffer)
1043 ;; char-equal is ignoring case here, so no need to upcase
1044 ;; or downcase.
1045 (let ((case-fold-search t))
1046 (and (char-equal fi (char-after mbox-beg))
1047 (char-equal li (char-after (1- mbox-end)))
1048 (setq mi (char-after (1+ mbox-beg))))))
1049 (cond ((and mi
1050 ;; TODO: use better table than syntax table
1051 (eq ?w (char-syntax mi)))
1052 (goto-char (match-beginning 3))
1053 (insert (upcase mi) ". ")))))
1054
1055;; ;; Restore suffix
1056;; (cond (name-suffix
1057;; (goto-char (point-max))
1058;; (insert ", " name-suffix)
1059;; (backward-word 1)
1060;; (cond ((memq (following-char) '(?j ?J ?s ?S))
1061;; (capitalize-word 1)
1062;; (or (eq (following-char) ?.)
1063;; (insert ?.)))
1064;; (t
1065;; (upcase-word 1)))))
1066
1067 ;; Result
1068 (list (buffer-string)
1069 (progn
1070 (set-buffer canonicalization-buffer)
1071 (buffer-string)))
1072 )))
1073
1074;; TODO: put this back in the above function now that it's proven:
1075(defun variant-method (string)
1076 (let ((variant-buffer (get-buffer-create "*variant method buffer*"))
1077 (word-count 0)
1078 mixed-case-flag lower-case-flag upper-case-flag
1079 suffix-flag last-name-comma-flag
1080 comment-beg comment-end initial beg end
1081 )
1082 (save-excursion
1083 (set-buffer variant-buffer)
1084 (buffer-flush-undo variant-buffer)
1085 (set-syntax-table address-text-syntax-table)
1086 (widen)
1087 (erase-buffer)
1088 (setq case-fold-search nil)
1089
1090 (insert string)
1091
1092 ;; Fix . used as space
1093 (goto-char (point-min))
1094 (while (re-search-forward mail-bad-\.-pattern nil t)
1095 (replace-match "\\1 \\2"))
1096
1097 ;; Skip any initial garbage.
1098 (goto-char (point-min))
1099 (skip-chars-forward mail-non-begin-name-chars)
1100 (skip-chars-backward "& \"")
1101 (narrow-to-region (point) (point-max))
1102
1103 (catch 'stop
1104 (while t
1105 (skip-chars-forward mail-whitespace)
1106
1107 (cond
1108
1109 ;; Delete title
1110 ((and (eq word-count 0)
1111 (looking-at mail-full-name-prefixes))
1112 (goto-char (match-end 0))
1113 (narrow-to-region (point) (point-max)))
1114
1115 ;; Stop after name suffix
1116 ((and (>= word-count 2)
1117 (looking-at mail-full-name-suffix-pattern))
1118 (skip-chars-backward mail-whitespace)
1119 (setq suffix-flag (point))
1120 (if (eq ?, (following-char))
1121 (forward-char 1)
1122 (insert ?,))
1123 ;; Enforce at least one space after comma
1124 (or (eq mail-space-char (following-char))
1125 (insert mail-space-char))
1126 (skip-chars-forward mail-whitespace)
1127 (cond ((memq (following-char) '(?j ?J ?s ?S))
1128 (capitalize-word 1)
1129 (if (eq (following-char) ?.)
1130 (forward-char 1)
1131 (insert ?.)))
1132 (t
1133 (upcase-word 1)))
1134 (setq word-count (1+ word-count))
1135 (throw 'stop t))
1136
1137 ;; Handle SCA names
1138 ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As"
1139 (setq word-count 0)
1140 (goto-char (match-beginning 1))
1141 (narrow-to-region (point) (point-max)))
1142
1143 ;; Various stopping points
1144 ((or
1145 ;; Stop before ALL CAPS acronyms, if preceded by mixed-case or
1146 ;; lowercase words. Eg. XT-DEM.
1147 (and (>= word-count 2)
1148 (or mixed-case-flag lower-case-flag)
1149 (looking-at mail-weird-acronym-pattern)
1150 (not (looking-at mail-roman-numeral-pattern)))
1151 ;; Stop before 4-or-more letter lowercase words preceded by
1152 ;; mixed case or uppercase words.
1153 (and (>= word-count 2)
1154 (or upper-case-flag mixed-case-flag)
1155 (looking-at "[a-z][a-z][a-z][a-z]+\\b"))
1156 ;; Stop before trailing alternative address
1157 (looking-at mail-alternative-address-pattern)
1158 ;; Stop before trailing comment not introduced by comma
1159 (looking-at mail-trailing-comment-start-pattern)
1160 ;; Stop before telephone numbers
1161 (looking-at mail-telephone-extension-pattern))
1162 (throw 'stop t))
1163
1164 ;; Check for initial last name followed by comma
1165 ((and (eq ?, (following-char))
1166 (eq word-count 1))
1167 (forward-char 1)
1168 (setq last-name-comma-flag t)
1169 (or (eq mail-space-char (following-char))
1170 (insert mail-space-char)))
1171
1172 ;; Stop before trailing comma-separated comment
1173 ((eq ?, (following-char))
1174 (throw 'stop t))
1175
1176 ;; Delete parenthesized/quoted comment/nickname
1177 ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`))
1178 (setq comment-beg (point))
1179 (set-syntax-table address-text-comment-syntax-table)
1180 (cond ((memq (following-char) '(?\' ?\`))
1181 (if (eq ?\' (following-char))
1182 (forward-char 1))
1183 (or (search-forward "'" nil t)
1184 (delete-char 1)))
1185 (t
1186 (or (safe-move-sexp 1)
1187 (goto-char (point-max)))))
1188 (set-syntax-table address-text-syntax-table)
1189 (setq comment-end (point))
1190 (cond
1191 ;; Handle case of entire name being quoted
1192 ((and (eq word-count 0)
1193 (looking-at " *\\'")
1194 (>= (- comment-end comment-beg) 2))
1195 (narrow-to-region (1+ comment-beg) (1- comment-end))
1196 (goto-char (point-min)))
1197 (t
1198 ;; Handle case of quoted initial
1199 (if (and (or (= 3 (- comment-end comment-beg))
1200 (and (= 4 (- comment-end comment-beg))
1201 (eq ?. (char-after (+ 2 comment-beg)))))
1202 (not (looking-at " *\\'")))
1203 (setq initial (char-after (1+ comment-beg)))
1204 (setq initial nil))
1205 (delete-region comment-beg comment-end)
1206 (if initial
1207 (insert initial ". ")))))
1208
1209 ;; Delete ham radio call signs
1210 ((looking-at mail-ham-call-sign-pattern)
1211 (delete-region (match-beginning 0) (match-end 0)))
1212
1213 ;; Handle & substitution
1214 ;; TODO: remember to disable middle initial guessing
1215 ((and (or (bobp)
1216 (eq mail-space-char (preceding-char)))
1217 (looking-at "&\\( \\|\\'\\)"))
1218 (delete-char 1)
1219 (capitalize-region
1220 (point)
1221 (progn
1222 (insert-buffer-substring canonicalization-buffer
1223 mbox-beg mbox-end)
1224 (point))))
1225
1226 ;; Fixup initials
1227 ((looking-at mail-initial-pattern)
1228 (or (eq (following-char) (upcase (following-char)))
1229 (setq lower-case-flag t))
1230 (forward-char 1)
1231 (if (eq ?. (following-char))
1232 (forward-char 1)
1233 (insert ?.))
1234 (or (eq mail-space-char (following-char))
1235 (insert mail-space-char))
1236 (setq word-count (1+ word-count)))
1237
1238 ;; Regular name words
1239 ((looking-at mail-name-pattern)
1240 (setq beg (point))
1241 (setq end (match-end 0))
1242 (set (if (re-search-forward "[a-z]" end t)
1243 (if (progn
1244 (goto-char beg)
1245 (re-search-forward "[A-Z]" end t))
1246 'mixed-case-flag
1247 'lower-case-flag)
1248 'upper-case-flag) t)
1249 (goto-char end)
1250 (setq word-count (1+ word-count)))
1251
1252 (t
1253 (throw 'stop t)))))
1254
1255 (narrow-to-region (point-min) (point))
1256
1257 ;; Delete trailing word followed immediately by .
1258 (cond ((not suffix-flag)
1259 (goto-char (point-min))
1260 (if (re-search-forward "\\b[A-Za-z][A-Za-z]+\\. *\\'" nil t)
1261 (narrow-to-region (point-min) (match-beginning 0)))))
1262
1263 ;; If last name first put it at end (but before suffix)
1264 (cond (last-name-comma-flag
1265 (goto-char (point-min))
1266 (search-forward ",")
1267 (setq end (1- (point)))
1268 (goto-char (or suffix-flag (point-max)))
1269 (or (eq mail-space-char (preceding-char))
1270 (insert mail-space-char))
1271 (insert-buffer-substring (current-buffer) (point-min) end)
1272 (narrow-to-region (1+ end) (point-max))))
1273
1274 (goto-char (point-max))
1275 (skip-chars-backward mail-non-end-name-chars)
1276 (if (eq ?. (following-char))
1277 (forward-char 1))
1278 (narrow-to-region (point)
1279 (progn
1280 (goto-char (point-min))
1281 (skip-chars-forward mail-non-begin-name-chars)
1282 (point)))
1283
1284 ;; Compress whitespace
1285 (goto-char (point-min))
1286 (while (re-search-forward "[ \t\n]+" nil t)
1287 (replace-match " "))
1288
1289 (buffer-substring (point-min) (point-max))
1290
1291 )))
1292
1293;; The country names are just in there for show right now, and because
1294;; Jamie thought it would be neat. They aren't used yet.
1295
1296;; Keep in mind that the country abbreviations follow ISO-3166. There is
1297;; a U.S. FIPS that specifies a different set of two-letter country
1298;; abbreviations.
1299
1300;; TODO: put this in its own obarray, instead of cluttering up the main
1301;; symbol table with junk.
1302
1303(mapcar
1304 (function
1305 (lambda (x)
1306 (if (symbolp x)
1307 (put x 'domain-name t)
1308 (put (car x) 'domain-name (nth 1 x)))))
1309 '((ag "Antigua")
1310 (ar "Argentina") ; Argentine Republic
1311 arpa ; Advanced Projects Research Agency
1312 (at "Austria") ; The Republic of _
1313 (au "Australia")
1314 (bb "Barbados")
1315 (be "Belgium") ; The Kingdom of _
1316 (bg "Bulgaria")
1317 bitnet ; Because It's Time NET
1318 (bo "Bolivia") ; Republic of _
1319 (br "Brazil") ; The Federative Republic of _
1320 (bs "Bahamas")
1321 (bz "Belize")
1322 (ca "Canada")
1323 (ch "Switzerland") ; The Swiss Confederation
1324 (cl "Chile") ; The Republic of _
1325 (cn "China") ; The People's Republic of _
1326 (co "Columbia")
1327 com ; Commercial
1328 (cr "Costa Rica") ; The Republic of _
1329 (cs "Czechoslovakia")
1330 (de "Germany")
1331 (dk "Denmark")
1332 (dm "Dominica")
1333 (do "Dominican Republic") ; The _
1334 (ec "Ecuador") ; The Republic of _
1335 edu ; Educational
1336 (eg "Egypt") ; The Arab Republic of _
1337 (es "Spain") ; The Kingdom of _
1338 (fi "Finland") ; The Republic of _
1339 (fj "Fiji")
1340 (fr "France")
1341 gov ; Government (U.S.A.)
1342 (gr "Greece") ; The Hellenic Republic
1343 (hk "Hong Kong")
1344 (hu "Hungary") ; The Hungarian People's Republic (???)
1345 (ie "Ireland")
1346 (il "Israel") ; The State of _
1347 (in "India") ; The Republic of _
1348 int ; something British, don't know what
1349 (is "Iceland") ; The Republic of _
1350 (it "Italy") ; The Italian Republic
1351 (jm "Jamaica")
1352 (jp "Japan")
1353 (kn "St. Kitts and Nevis")
1354 (kr "South Korea")
1355 (lc "St. Lucia")
1356 (lk "Sri Lanka") ; The Democratic Socialist Republic of _
1357 mil ; Military (U.S.A.)
1358 (mx "Mexico") ; The United Mexican States
1359 (my "Malaysia") ; changed to Myanmar????
1360 (na "Namibia")
1361 nato ; North Atlantic Treaty Organization
1362 net ; Network
1363 (ni "Nicaragua") ; The Republic of _
1364 (nl "Netherlands") ; The Kingdom of the _
1365 (no "Norway") ; The Kingdom of _
1366 (nz "New Zealand")
1367 org ; Organization
1368 (pe "Peru")
1369 (pg "Papua New Guinea")
1370 (ph "Philippines") ; The Republic of the _
1371 (pl "Poland")
1372 (pr "Puerto Rico")
1373 (pt "Portugal") ; The Portugese Republic
1374 (py "Paraguay")
1375 (se "Sweden") ; The Kingdom of _
1376 (sg "Singapore") ; The Republic of _
1377 (sr "Suriname")
1378 (su "Soviet Union")
1379 (th "Thailand") ; The Kingdom of _
1380 (tn "Tunisia")
1381 (tr "Turkey") ; The Republic of _
1382 (tt "Trinidad and Tobago")
1383 (tw "Taiwan")
1384 (uk "United Kingdom") ; The _ of Great Britain
1385 unter-dom ; something German
1386 (us "U.S.A.") ; The United States of America
1387 uucp ; Unix to Unix CoPy
1388 (uy "Uruguay") ; The Eastern Republic of _
1389 (vc "St. Vincent and the Grenadines")
1390 (ve "Venezuela") ; The Republic of _
1391 (yu "Yugoslavia") ; The Socialist Federal Republic of _
1392 ;; Also said to be Zambia ...
1393 (za "South Africa") ; The Republic of _ (why not Zaire???)
1394 (zw "Zimbabwe") ; Republic of _
1395 ))
1396;; fipnet
1397
1398\f
1399;; Code for testing.
1400
1401(defun time-extract ()
1402 (let (times list)
1403 (setq times (cons (current-time-string) times)
1404 list problem-address-alist)
1405 (while list
1406 (mail-extract-address-components (car (car list)))
1407 (setq list (cdr list)))
1408 (setq times (cons (current-time-string) times))
1409 (nreverse times)))
1410
1411(defun test-extract (&optional starting-point)
1412 (interactive)
1413 (set-buffer (get-buffer-create "*Testing*"))
1414 (erase-buffer)
1415 (sit-for 0)
1416 (mapcar 'test-extract-internal
1417 (if starting-point
1418 (memq starting-point problem-address-alist)
1419 problem-address-alist)))
1420
1421(defvar failed-item)
1422(defun test-extract-internal (item)
1423 (setq failed-item item)
1424 (let* ((address (car item))
1425 (correct-name (nth 1 item))
1426 (correct-canon (nth 2 item))
1427 (result (mail-extract-address-components address))
1428 (name (car result))
1429 (canon (nth 1 result))
1430 (name-correct (or (null correct-name)
1431 (string-equal (downcase correct-name)
1432 (downcase name))))
1433 (canon-correct (or (null correct-canon)
1434 (string-equal correct-canon canon))))
1435 (cond ((not (and name-correct canon-correct))
1436 (pop-to-buffer "*Testing*")
1437 (select-window (get-buffer-window (current-buffer)))
1438 (goto-char (point-max))
1439 (insert "Address: " address "\n")
1440 (if (not name-correct)
1441 (insert " Correct Name: [" correct-name
1442 "]\; Result: [" name "]\n"))
1443 (if (not canon-correct)
1444 (insert " Correct Canon: [" correct-canon
1445 "]\; Result: [" canon "]\n"))
1446 (insert "\n")
1447 (sit-for 0))))
1448 (setq failed-item nil))
1449
1450(defun test-continue-extract ()
1451 (interactive)
1452 (test-extract failed-item))
1453
1454\f
1455;; Assorted junk.
1456
1457;; warsaw@nlm.nih.gov (A Bad Dude -- Barry Warsaw)
1458
1459;;'(from
1460;; reply-to
1461;; return-path
1462;; x-uucp-from
1463;; sender
1464;; resent-from
1465;; resent-sender
1466;; resent-reply-to)
1467
1468;;; mail-extr.el ends here