Commit | Line | Data |
---|---|---|
c113de23 | 1 | ;;; rfc1843.el --- HZ (rfc1843) decoding |
e84b4b86 | 2 | |
b6c2d8c6 | 3 | ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
e3fe4da0 | 4 | ;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc. |
c113de23 GM |
5 | |
6 | ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu> | |
7 | ;; Keywords: news HZ HZ+ mail i18n | |
8 | ||
715a2ca2 | 9 | ;; This file is part of GNU Emacs. |
c113de23 GM |
10 | |
11 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
12 | ;; it under the terms of the GNU General Public License as published | |
5a9dffec | 13 | ;; by the Free Software Foundation; either version 3, or (at your |
c113de23 GM |
14 | ;; option) any later version. |
15 | ||
16 | ;; GNU Emacs is distributed in the hope that it will be useful, but | |
17 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19 | ;; General Public License for more details. | |
20 | ||
21 | ;; You should have received a copy of the GNU General Public License | |
22 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
3a35cf56 LK |
23 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
24 | ;; Boston, MA 02110-1301, USA. | |
c113de23 GM |
25 | |
26 | ;;; Commentary: | |
27 | ||
28 | ;; Usage: | |
29 | ;; (require 'rfc1843) | |
30 | ;; (rfc1843-gnus-setup) | |
31 | ;; | |
32 | ;; Test: | |
33 | ;; (rfc1843-decode-string "~{<:Ky2;S{#,NpJ)l6HK!#~}") | |
34 | ||
35 | ;;; Code: | |
36 | ||
444f6b28 GM |
37 | ;; For Emacs < 22.2. |
38 | (eval-and-compile | |
39 | (unless (fboundp 'declare-function) (defmacro declare-function (&rest r)))) | |
40 | ||
1391c567 | 41 | (eval-when-compile (require 'cl)) |
c113de23 GM |
42 | (require 'mm-util) |
43 | ||
2df35c02 JB |
44 | (defvar gnus-decode-encoded-word-function) |
45 | (defvar gnus-decode-header-function) | |
46 | (defvar gnus-newsgroup-name) | |
47 | ||
c113de23 GM |
48 | (defvar rfc1843-word-regexp |
49 | "~\\({\\([\041-\167][\041-\176]\\| \\)+\\)\\(~}\\|$\\)") | |
50 | ||
51 | (defvar rfc1843-word-regexp-strictly | |
52 | "~\\({\\([\041-\167][\041-\176]\\)+\\)\\(~}\\|$\\)") | |
53 | ||
54 | (defvar rfc1843-hzp-word-regexp | |
55 | "~\\({\\([\041-\167][\041-\176]\\| \\)+\\|\ | |
23f87bed | 56 | \[<>]\\([\041-\175][\041-\176]\\| \\)+\\)\\(~}\\|$\\)") |
c113de23 GM |
57 | |
58 | (defvar rfc1843-hzp-word-regexp-strictly | |
59 | "~\\({\\([\041-\167][\041-\176]\\)+\\|\ | |
23f87bed | 60 | \[<>]\\([\041-\175][\041-\176]\\)+\\)\\(~}\\|$\\)") |
c113de23 GM |
61 | |
62 | (defcustom rfc1843-decode-loosely nil | |
63 | "Loosely check HZ encoding if non-nil. | |
64 | When it is set non-nil, only buffers or strings with strictly | |
65 | HZ-encoded are decoded." | |
66 | :type 'boolean | |
23f87bed | 67 | :group 'mime) |
c113de23 GM |
68 | |
69 | (defcustom rfc1843-decode-hzp t | |
70 | "HZ+ decoding support if non-nil. | |
71 | HZ+ specification (also known as HZP) is to provide a standardized | |
72 | 7-bit representation of mixed Big5, GB, and ASCII text for convenient | |
73 | e-mail transmission, news posting, etc. | |
74 | The document of HZ+ 0.78 specification can be found at | |
75 | ftp://ftp.math.psu.edu/pub/simpson/chinese/hzp/hzp.doc" | |
76 | :type 'boolean | |
23f87bed | 77 | :group 'mime) |
c113de23 GM |
78 | |
79 | (defcustom rfc1843-newsgroups-regexp "chinese\\|hz" | |
80 | "Regexp of newsgroups in which might be HZ encoded." | |
81 | :type 'string | |
23f87bed | 82 | :group 'mime) |
c113de23 GM |
83 | |
84 | (defun rfc1843-decode-region (from to) | |
85 | "Decode HZ in the region between FROM and TO." | |
86 | (interactive "r") | |
87 | (let (str firstc) | |
88 | (save-excursion | |
89 | (goto-char from) | |
90 | (if (or rfc1843-decode-loosely | |
91 | (re-search-forward (if rfc1843-decode-hzp | |
92 | rfc1843-hzp-word-regexp-strictly | |
93 | rfc1843-word-regexp-strictly) to t)) | |
94 | (save-restriction | |
95 | (narrow-to-region from to) | |
96 | (goto-char (point-min)) | |
97 | (while (re-search-forward (if rfc1843-decode-hzp | |
98 | rfc1843-hzp-word-regexp | |
99 | rfc1843-word-regexp) (point-max) t) | |
100 | ;;; Text with extents may cause XEmacs crash | |
a1506d29 | 101 | (setq str (buffer-substring-no-properties |
c113de23 GM |
102 | (match-beginning 1) |
103 | (match-end 1))) | |
104 | (setq firstc (aref str 0)) | |
105 | (insert (mm-decode-coding-string | |
106 | (rfc1843-decode | |
107 | (prog1 | |
108 | (substring str 1) | |
109 | (delete-region (match-beginning 0) (match-end 0))) | |
110 | firstc) | |
111 | (if (eq firstc ?{) 'cn-gb-2312 'cn-big5)))) | |
112 | (goto-char (point-min)) | |
113 | (while (search-forward "~" (point-max) t) | |
114 | (cond ((eq (char-after) ?\n) | |
115 | (delete-char -1) | |
116 | (delete-char 1)) | |
117 | ((eq (char-after) ?~) | |
118 | (delete-char 1))))))))) | |
119 | ||
120 | (defun rfc1843-decode-string (string) | |
121 | "Decode HZ STRING and return the results." | |
122 | (let ((m (mm-multibyte-p))) | |
123 | (with-temp-buffer | |
124 | (when m | |
125 | (mm-enable-multibyte)) | |
126 | (insert string) | |
127 | (inline | |
128 | (rfc1843-decode-region (point-min) (point-max))) | |
129 | (buffer-string)))) | |
130 | ||
131 | (defun rfc1843-decode (word &optional firstc) | |
132 | "Decode HZ WORD and return it." | |
133 | (let ((i -1) (s (substring word 0)) v) | |
134 | (if (or (not firstc) (eq firstc ?{)) | |
135 | (while (< (incf i) (length s)) | |
136 | (if (eq (setq v (aref s i)) ? ) nil | |
137 | (aset s i (+ 128 v)))) | |
138 | (while (< (incf i) (length s)) | |
139 | (if (eq (setq v (aref s i)) ? ) nil | |
140 | (setq v (+ (* 94 v) (aref s (1+ i)) -3135)) | |
141 | (aset s i (+ (/ v 157) (if (eq firstc ?<) 201 161))) | |
142 | (setq v (% v 157)) | |
143 | (aset s (incf i) (+ v (if (< v 63) 64 98)))))) | |
144 | s)) | |
145 | ||
444f6b28 GM |
146 | (autoload 'mail-header-parse-content-type "mail-parse") |
147 | (autoload 'message-narrow-to-head "message") | |
148 | (declare-function message-fetch-field "message" (header &optional not-all)) | |
149 | ||
c113de23 GM |
150 | (defun rfc1843-decode-article-body () |
151 | "Decode HZ encoded text in the article body." | |
152 | (if (string-match (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") | |
153 | (or gnus-newsgroup-name "")) | |
154 | (save-excursion | |
155 | (save-restriction | |
156 | (message-narrow-to-head) | |
157 | (let* ((inhibit-point-motion-hooks t) | |
158 | (case-fold-search t) | |
159 | (ct (message-fetch-field "Content-Type" t)) | |
c96ec15a | 160 | (ctl (and ct (mail-header-parse-content-type ct)))) |
a1506d29 | 161 | (if (and ctl (not (string-match "/" (car ctl)))) |
c113de23 GM |
162 | (setq ctl nil)) |
163 | (goto-char (point-max)) | |
164 | (widen) | |
165 | (forward-line 1) | |
166 | (narrow-to-region (point) (point-max)) | |
167 | (when (or (not ctl) | |
168 | (equal (car ctl) "text/plain")) | |
169 | (rfc1843-decode-region (point) (point-max)))))))) | |
170 | ||
171 | (defvar rfc1843-old-gnus-decode-header-function nil) | |
172 | (defvar gnus-decode-header-methods) | |
173 | (defvar gnus-decode-encoded-word-methods) | |
174 | ||
175 | (defun rfc1843-gnus-setup () | |
176 | "Setup HZ decoding for Gnus." | |
177 | (require 'gnus-art) | |
178 | (require 'gnus-sum) | |
179 | (add-hook 'gnus-article-decode-hook 'rfc1843-decode-article-body t) | |
180 | (setq gnus-decode-encoded-word-function | |
181 | 'gnus-multi-decode-encoded-word-string | |
182 | gnus-decode-header-function | |
183 | 'gnus-multi-decode-header | |
184 | gnus-decode-encoded-word-methods | |
185 | (nconc gnus-decode-encoded-word-methods | |
186 | (list | |
187 | (cons (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") | |
188 | 'rfc1843-decode-string))) | |
189 | gnus-decode-header-methods | |
190 | (nconc gnus-decode-header-methods | |
191 | (list | |
192 | (cons (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") | |
193 | 'rfc1843-decode-region))))) | |
194 | ||
195 | (provide 'rfc1843) | |
196 | ||
ab5796a9 | 197 | ;;; arch-tag: 5149c301-a6ca-4731-9c9d-ba616e2cb687 |
c113de23 | 198 | ;;; rfc1843.el ends here |