(mail-extr-voodoo): Handle unmatched quotes in the comment-deletion loop.

[bpt/emacs.git] / lisp / mail / mail-extr.el
diff --git a/lisp/mail/mail-extr.el b/lisp/mail/mail-extr.el

index fc84c3b..b797592 100644 (file)
--- a/lisp/mail/mail-extr.el
+++ b/lisp/mail/mail-extr.el
@@ -20,8 +20,9 @@
  ;; GNU General Public License for more details.
  
  ;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs; see the file COPYING.  If not, write to
-;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+;; along with GNU Emacs; see the file COPYING.  If not, write to the
+;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
  
  ;;; Commentary:
  
@@ -279,9 +280,8 @@ by translating things like \"foo!bar!baz@host\" into \"baz@bar.UUCP\".")
  ;; Yes, there are weird people with digits in their names.
  ;; You will also notice the consideration for the
  ;; Swedish/Finnish/Norwegian character set.
-;; #### (go to \376 instead of \377 to work around bug in search.c...)
  (defconst mail-extr-all-letters-but-separators
-  (purecopy "][A-Za-z{|}'~0-9`\200-\376"))
+  (purecopy "][A-Za-z{|}'~0-9`\200-\377"))
  
  ;; Any character that can occur in a name in an RFC822 address including
  ;; the separator (hyphen and possibly period) for multipart names.
@@ -291,11 +291,11 @@ by translating things like \"foo!bar!baz@host\" into \"baz@bar.UUCP\".")
  
  ;; Any character that can start a name.
  ;; Keep this set as minimal as possible.
-(defconst mail-extr-first-letters (purecopy "A-Za-z"))
+(defconst mail-extr-first-letters (purecopy "A-Za-z\200-\377"))
  
  ;; Any character that can end a name.
  ;; Keep this set as minimal as possible.
-(defconst mail-extr-last-letters (purecopy "[A-Za-z`'."))
+(defconst mail-extr-last-letters (purecopy "A-Za-z\200-\377`'."))
  
  (defconst mail-extr-leading-garbage
    (purecopy (format "[^%s]+" mail-extr-first-letters)))
@@ -646,7 +646,7 @@ by translating things like \"foo!bar!baz@host\" into \"baz@bar.UUCP\".")
    ;;  which lie outside of the range, one character at that position is
    ;;  replaced with a SPC.
    (or (memq no-replace '(t nil))
-      (error "no-replace must be t or nil, evalable at macroexpand-time."))
+      (error "no-replace must be t or nil, evaluable at macroexpand-time"))
    (` (let ((temp (, list-symbol))
            ch)
         (while temp
@@ -687,7 +687,7 @@ by translating things like \"foo!bar!baz@host\" into \"baz@bar.UUCP\".")
    ;; Safely skip over one balanced sexp, if there is one.  Return t if success.
    (` (condition-case error
          (progn
-          (goto-char (scan-sexps (point) (, arg)))
+          (goto-char (or (scan-sexps (point) (, arg)) (point)))
            t)
         (error
         ;; #### kludge kludge kludge kludge kludge kludge kludge !!!
@@ -721,7 +721,7 @@ If ADDRESS contains more than one RFC-822 address, only the first is
         (extraction-buffer (get-buffer-create " *extract address components*"))
         char
  ;;     multiple-addresses
-       <-pos >-pos @-pos :-pos ,-pos !-pos %-pos \;-pos
+       <-pos >-pos @-pos :-pos comma-pos !-pos %-pos \;-pos
         group-:-pos group-\;-pos route-addr-:-pos
         record-pos-symbol
         first-real-pos last-real-pos
@@ -760,7 +760,9 @@ If ADDRESS contains more than one RFC-822 address, only the first is
             ((bufferp address)
              (insert-buffer-substring address))
             (t
-            (error "Illegal address: %s" address)))
+            (error "Invalid address: %s" address)))
+
+      (set-text-properties (point-min) (point-max) nil)
        
        ;; stolen from rfc822.el
        ;; Unfold multiple lines.
@@ -840,7 +842,7 @@ If ADDRESS contains more than one RFC-822 address, only the first is
          ((setq record-pos-symbol
                 (cdr (assq char
                            '((?< . <-pos) (?> . >-pos) (?@ . @-pos)
-                            (?: . :-pos) (?, . ,-pos) (?! . !-pos)
+                            (?: . :-pos) (?, . comma-pos) (?! . !-pos)
                              (?% . %-pos) (?\; . \;-pos)))))
           (set record-pos-symbol
                (cons (point) (symbol-value record-pos-symbol)))
@@ -969,7 +971,7 @@ If ADDRESS contains more than one RFC-822 address, only the first is
         (mail-extr-nuke-outside-range !-pos group-:-pos group-\;-pos t)
         (mail-extr-nuke-outside-range @-pos group-:-pos group-\;-pos t)
         (mail-extr-nuke-outside-range %-pos group-:-pos group-\;-pos t)
-       (mail-extr-nuke-outside-range ,-pos group-:-pos group-\;-pos t)
+       (mail-extr-nuke-outside-range comma-pos group-:-pos group-\;-pos t)
         (and last-real-pos
              (> last-real-pos (1+ group-\;-pos))
              (setq last-real-pos (1+ group-\;-pos)))
@@ -995,7 +997,7 @@ If ADDRESS contains more than one RFC-822 address, only the first is
        ;; Hell, go ahead an nuke all of the commas.
        ;; **** This will cause problems when we start handling commas in
        ;; the PHRASE part .... no it won't ... yes it will ... ?????
-      (mail-extr-nuke-outside-range ,-pos 1 1)
+      (mail-extr-nuke-outside-range comma-pos 1 1)
        
        ;; can only have multiple @s inside < >.  The fact that some MTAs
        ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is
@@ -1165,8 +1167,6 @@ If ADDRESS contains more than one RFC-822 address, only the first is
              (setq %-pos (cdr %-pos))))
  
        (setq %-pos (nreverse %-pos))
-      ;; RFC 1034 doesn't approve of this, oh well:
-      (downcase-region (or (car %-pos) @-pos (point-max)) (point-max))
        (cond (%-pos                     ; implies @-pos valid
              (setq temp %-pos)
              (catch 'truncated
@@ -1274,7 +1274,8 @@ If ADDRESS contains more than one RFC-822 address, only the first is
                    (backward-char 1)
                    (mail-extr-delete-char 1)
                    (goto-char quote-beg)
-                  (mail-extr-delete-char 1))
+                  (or (eobp)
+                      (mail-extr-delete-char 1)))
                  (mail-extr-undo-backslash-quoting quote-beg quote-end)
                  (or (eq ?\  (char-after (point)))
                      (insert " "))
@@ -1446,6 +1447,33 @@ If ADDRESS contains more than one RFC-822 address, only the first is
         )
      (save-excursion
        (set-syntax-table mail-extr-address-text-syntax-table)
+
+      ;; Get rid of comments.
+      (goto-char (point-min))
+      (while (not (eobp))
+       ;; Initialize for this iteration of the loop.
+       (skip-chars-forward "^({[\"'`")
+       (let ((cbeg (point)))
+         (set-syntax-table mail-extr-address-text-comment-syntax-table)
+         (cond ((memq (following-char) '(?\' ?\`))
+                (search-forward "'" nil 'move
+                                (if (eq ?\' (following-char)) 2 1)))
+               (t
+                (or (mail-extr-safe-move-sexp 1)
+                    (goto-char (point-max)))))
+         (set-syntax-table mail-extr-address-text-syntax-table)
+         (when (eq (char-after cbeg) ?\()
+           ;; Delete the comment itself.
+           (delete-region cbeg (point))
+           ;; Canonicalize whitespace where the comment was.
+           (skip-chars-backward " \t")
+           (if (looking-at "\\([ \t]+$\\|[ \t]+,\\)")
+               (replace-match "")
+             (setq cbeg (point))
+             (skip-chars-forward " \t")
+             (if (bobp)
+                 (delete-region (point) cbeg)
+               (just-one-space))))))
        
        ;; This was moved above.
        ;; Fix . used as space
@@ -1702,7 +1730,7 @@ If ADDRESS contains more than one RFC-822 address, only the first is
        
        ;; If the last thing in the name is 2 or more periods, or one or more
        ;; other sentence terminators (but not a single period) then keep them
-      ;; and the preceeding word.  This is for the benefit of whole sentences
+      ;; and the preceding word.  This is for the benefit of whole sentences
        ;; in the name field: it's better behavior than dropping the last word
        ;; of the sentence...
        (if (and (not suffix-flag)
@@ -1741,7 +1769,7 @@ If ADDRESS contains more than one RFC-822 address, only the first is
              (narrow-to-region (point) (point-max))))
        
        ;; Delete leading and trailing junk characters.
-      ;; *** This is probably completly unneeded now.
+      ;; *** This is probably completely unneeded now.
        ;;(goto-char (point-max))
        ;;(skip-chars-backward mail-extr-non-end-name-chars)
        ;;(if (eq ?. (following-char))
@@ -1779,106 +1807,184 @@ If ADDRESS contains more than one RFC-822 address, only the first is
              (if (nth 2 x)
                  (format (nth 2 x) (nth 1 x))
                (nth 1 x)))))
-     '(("ag" "Antigua")
+     '(
+       ;; ISO 3166 codes:
+       ("ae" "United Arab Emirates")
+       ("ag" "Antigua and Barbuda")
+       ("al" "Albania")
+       ("ao" "Angola")
+       ("aq" "Antarctica")             ; continent
         ("ar" "Argentina"       "Argentine Republic")
-       ("arpa" t               "Advanced Projects Research Agency")
         ("at" "Austria"         "The Republic of %s")
         ("au" "Australia")
+       ("az" "Azerbaijan")
         ("bb" "Barbados")
+       ("bd" "Bangladesh")
         ("be" "Belgium"         "The Kingdom of %s")
+       ("bf" "Burkina Faso")
         ("bg" "Bulgaria")
-       ("bitnet" t             "Because It's Time NET")
+       ("bh" "Bahrain")
+       ("bm" "Bermuda")
         ("bo" "Bolivia"         "Republic of %s")
         ("br" "Brazil"          "The Federative Republic of %s")
         ("bs" "Bahamas")
+       ("bw" "Botswana")
+       ("by" "Belarus")
         ("bz" "Belize")
         ("ca" "Canada")
+       ("cg" "Congo")
         ("ch" "Switzerland"     "The Swiss Confederation")
+       ("ci" "Ivory Coast")
         ("cl" "Chile"           "The Republic of %s")
+       ("cm" "Cameroon")               ; In .fr domain
         ("cn" "China"           "The People's Republic of %s")
-       ("co" "Columbia")
-       ("com" t                        "Commercial organizations (U.S.A.)")
+       ("co" "Colombia")
         ("cr" "Costa Rica"      "The Republic of %s")
         ("cs" "Czechoslovakia")
+       ("cu" "Cuba")
+       ("cy" "Cyprus")
+       ("cz" "Czech Republic")
         ("de" "Germany")
         ("dk" "Denmark")
         ("dm" "Dominica")
         ("do" "Dominican Republic"      "The %s")
+       ("dz" "Algeria")
         ("ec" "Ecuador"         "The Republic of %s")
-       ("edu" t                        "Educational institutions (U.S.A.)")
+       ("ee" "Estonia")
         ("eg" "Egypt"           "The Arab Republic of %s")
+       ("er" "Eritrea")
         ("es" "Spain"           "The Kingdom of %s")
         ("fi" "Finland"         "The Republic of %s")
         ("fj" "Fiji")
+       ("fo" "Faroe Islands")
         ("fr" "France")
-       ("gov" t                        "Government (U.S.A.)")
+       ("gb" "Great Britain")
+       ("gd" "Grenada")
+       ("ge" "Georgia")
+       ("gf" "Guyana (Fr.)")
+       ("gp" "Guadeloupe (Fr.)")
         ("gr" "Greece"          "The Hellenic Republic (%s)")
+       ("gt" "Guatemala")
+       ("gu" "Guam (U.S.)")
         ("hk" "Hong Kong")
-       ("hu" "Hungary"         "The Hungarian People's Republic")      ;???
+       ("hn" "Honduras")
+       ("hr" "Croatia")
+       ("ht" "Haiti")
+       ("hu" "Hungary"         "The Hungarian Republic")       ;???
+       ("id" "Indonesia")
         ("ie" "Ireland")
         ("il" "Israel"          "The State of %s")
         ("in" "India"           "The Republic of %s")
-       ("int" t                        "(something British, don't know what)")
+       ("ir" "Iran")
         ("is" "Iceland"         "The Republic of %s")
         ("it" "Italy"           "The Italian Republic")
         ("jm" "Jamaica")
         ("jp" "Japan")
-       ("kn" "St. Kitts and Nevis")
-       ("kr" "South Korea")
+       ("ke" "Kenya")
+       ("kn" "St. Kitts, Nevis, and Anguilla")
+       ("kp" "Korea (North)")
+       ("kr" "Korea (South)")
+       ("kw" "Kuwait")
+       ("kz" "Kazakhstan")
+       ("lb" "Lebanon")
         ("lc" "St. Lucia")
+       ("li" "Liechtenstein")
         ("lk" "Sri Lanka"       "The Democratic Socialist Republic of %s")
-       ("mil" t                        "Military (U.S.A.)")
+       ("ls" "Lesotho")
+       ("lt" "Lithuania")
+       ("lu" "Luxembourg")
+       ("lv" "Latvia")
+       ("ma" "Morocco")
+       ("md" "Moldova")
+       ("mg" "Madagascar")
+       ("mk" "Macedonia")
+       ("ml" "Mali")
+       ("mo" "Macau")
+       ("mt" "Malta")
+       ("mu" "Mauritius")
+       ("mw" "Malawi")
         ("mx" "Mexico"          "The United Mexican States")
         ("my" "Malaysia"                "%s (changed to Myanmar?)")             ;???
+       ("mz" "Mozambique")
         ("na" "Namibia")
-       ("nato" t               "North Atlantic Treaty Organization")
-       ("net" t                        "Network")
+       ("nc" "New Caledonia (Fr.)")
+       ("ne" "Niger")                  ; In .fr domain
         ("ni" "Nicaragua"       "The Republic of %s")
         ("nl" "Netherlands"     "The Kingdom of the %s")
         ("no" "Norway"          "The Kingdom of %s")
+       ("np" "Nepal")                  ; Via .in domain
         ("nz" "New Zealand")
-       ("org" t                        "Non-commercial organizations (U.S.A.)")
+       ("pa" "Panama")
         ("pe" "Peru")
+       ("pf" "Polynesia (Fr.)")
         ("pg" "Papua New Guinea")
         ("ph" "Philippines"     "The Republic of the %s")
+       ("pk" "Pakistan")
         ("pl" "Poland")
-       ("pr" "Puerto Rico")
-       ("pt" "Portugal"                "The Portugese Republic")
+       ("pr" "Puerto Rico (U.S.)")
+       ("pt" "Portugal"                "The Portuguese Republic")
         ("py" "Paraguay")
+       ("re" "Reunion (Fr.)")          ; In .fr domain
+       ("ro" "Romania")
+       ("ru" "Russian Federation")
+       ("sa" "Saudi Arabia")
+       ("sc" "Seychelles")
+       ("sd" "Sudan")
         ("se" "Sweden"          "The Kingdom of %s")
         ("sg" "Singapore"       "The Republic of %s")
+       ("si" "Slovenia")
+       ("sj" "Svalbard and Jan Mayen Is.") ; In .no domain
+       ("sk" "Slovakia"                "The Slovak Republic")
+       ("sn" "Senegal")
         ("sr" "Suriname")
         ("su" "Soviet Union")
+       ("sz" "Swaziland")
+       ("tg" "Togo")
         ("th" "Thailand"                "The Kingdom of %s")
+       ("tm" "Turkmenistan")           ; In .su domain
         ("tn" "Tunisia")
         ("tr" "Turkey"          "The Republic of %s")
         ("tt" "Trinidad and Tobago")
         ("tw" "Taiwan")
+       ("ua" "Ukraine")
         ("uk" "United Kingdom"  "The %s of Great Britain and Northern Ireland")
-       ("unter-dom" t          "(something German)")
-       ("us" "U.S.A."          "The United States of America")
-       ("uucp" t               "Unix to Unix CoPy")
+       ("us" "United States"   "The %s of America")
         ("uy" "Uruguay"         "The Eastern Republic of %s")
         ("vc" "St. Vincent and the Grenadines")
         ("ve" "Venezuela"       "The Republic of %s")
+       ("vi" "Virgin Islands (U.S.)")
+       ("vn" "Vietnam")
+       ("vu" "Vanuatu")
         ("yu" "Yugoslavia"      "The Socialist Federal Republic of %s")
-       ;; Also said to be Zambia ... (why not Zaire???)
         ("za" "South Africa"    "The Republic of %s (or Zambia? Zaire?)")
         ("zw" "Zimbabwe"                "Republic of %s")
-;; fipnet
+       ;; Special top-level domains:
+       ("arpa" t               "Advanced Research Projects Agency (U.S. DoD)")
+       ("bitnet" t             "Because It's Time NET")
+       ("com" t                        "Commercial")
+       ("edu" t                        "Educational")
+       ("gov" t                        "Government (U.S.)")
+       ("int" t                        "International (NATO)")
+       ("mil" t                        "Military (U.S.)")
+       ("nato" t               "North Atlantic Treaty Organization")
+       ("net" t                        "Network")
+       ("org" t                        "Non-profit Organization")
+       ;;("unter-dom" t                "? (Ger.)")
+       ("uucp" t               "Unix to Unix CoPy")
+       ;;("fipnet" nil         "?")
         ))
      ob))
  
  ;;;###autoload
  (defun what-domain (domain)
-  "Convert mail domain to country tit corresponds to."
+  "Convert mail domain DOMAIN to the country it corresponds to."
    (interactive
     (let ((completion-ignore-case t))
       (list (completing-read "Domain: "
                             mail-extr-all-top-level-domains nil t))))
    (or (setq domain (intern-soft (downcase domain)
                                 mail-extr-all-top-level-domains))
-      (error "no such domain"))
+      (error "No such domain"))
    (message "%s: %s" (upcase (symbol-name domain)) (get domain 'domain-name)))
  
  \f