| 1 | ;;; rfc1843.el --- HZ (rfc1843) decoding |
| 2 | |
| 3 | ;; Copyright (C) 1998-2012 Free Software Foundation, Inc. |
| 4 | |
| 5 | ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu> |
| 6 | ;; Keywords: news HZ HZ+ mail i18n |
| 7 | |
| 8 | ;; This file is part of GNU Emacs. |
| 9 | |
| 10 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 11 | ;; it under the terms of the GNU General Public License as published by |
| 12 | ;; the Free Software Foundation, either version 3 of the License, or |
| 13 | ;; (at your option) any later version. |
| 14 | |
| 15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 18 | ;; GNU General Public License for more details. |
| 19 | |
| 20 | ;; You should have received a copy of the GNU General Public License |
| 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 22 | |
| 23 | ;;; Commentary: |
| 24 | |
| 25 | ;; Usage: |
| 26 | ;; (require 'rfc1843) |
| 27 | ;; (rfc1843-gnus-setup) |
| 28 | ;; |
| 29 | ;; Test: |
| 30 | ;; (rfc1843-decode-string "~{<:Ky2;S{#,NpJ)l6HK!#~}") |
| 31 | |
| 32 | ;;; Code: |
| 33 | |
| 34 | ;; For Emacs <22.2 and XEmacs. |
| 35 | (eval-and-compile |
| 36 | (unless (fboundp 'declare-function) (defmacro declare-function (&rest r)))) |
| 37 | |
| 38 | (eval-when-compile (require 'cl)) |
| 39 | (require 'mm-util) |
| 40 | |
| 41 | (defvar gnus-decode-encoded-word-function) |
| 42 | (defvar gnus-decode-header-function) |
| 43 | (defvar gnus-newsgroup-name) |
| 44 | |
| 45 | (defvar rfc1843-word-regexp |
| 46 | "~\\({\\([\041-\167][\041-\176]\\| \\)+\\)\\(~}\\|$\\)") |
| 47 | |
| 48 | (defvar rfc1843-word-regexp-strictly |
| 49 | "~\\({\\([\041-\167][\041-\176]\\)+\\)\\(~}\\|$\\)") |
| 50 | |
| 51 | (defvar rfc1843-hzp-word-regexp |
| 52 | "~\\({\\([\041-\167][\041-\176]\\| \\)+\\|\ |
| 53 | \[<>]\\([\041-\175][\041-\176]\\| \\)+\\)\\(~}\\|$\\)") |
| 54 | |
| 55 | (defvar rfc1843-hzp-word-regexp-strictly |
| 56 | "~\\({\\([\041-\167][\041-\176]\\)+\\|\ |
| 57 | \[<>]\\([\041-\175][\041-\176]\\)+\\)\\(~}\\|$\\)") |
| 58 | |
| 59 | (defcustom rfc1843-decode-loosely nil |
| 60 | "Loosely check HZ encoding if non-nil. |
| 61 | When it is set non-nil, only buffers or strings with strictly |
| 62 | HZ-encoded are decoded." |
| 63 | :type 'boolean |
| 64 | :group 'mime) |
| 65 | |
| 66 | (defcustom rfc1843-decode-hzp t |
| 67 | "HZ+ decoding support if non-nil. |
| 68 | HZ+ specification (also known as HZP) is to provide a standardized |
| 69 | 7-bit representation of mixed Big5, GB, and ASCII text for convenient |
| 70 | e-mail transmission, news posting, etc. |
| 71 | The document of HZ+ 0.78 specification can be found at |
| 72 | ftp://ftp.math.psu.edu/pub/simpson/chinese/hzp/hzp.doc" |
| 73 | :type 'boolean |
| 74 | :group 'mime) |
| 75 | |
| 76 | (defcustom rfc1843-newsgroups-regexp "chinese\\|hz" |
| 77 | "Regexp of newsgroups in which might be HZ encoded." |
| 78 | :type 'string |
| 79 | :group 'mime) |
| 80 | |
| 81 | (defun rfc1843-decode-region (from to) |
| 82 | "Decode HZ in the region between FROM and TO." |
| 83 | (interactive "r") |
| 84 | (let (str firstc) |
| 85 | (save-excursion |
| 86 | (goto-char from) |
| 87 | (if (or rfc1843-decode-loosely |
| 88 | (re-search-forward (if rfc1843-decode-hzp |
| 89 | rfc1843-hzp-word-regexp-strictly |
| 90 | rfc1843-word-regexp-strictly) to t)) |
| 91 | (save-restriction |
| 92 | (narrow-to-region from to) |
| 93 | (goto-char (point-min)) |
| 94 | (while (re-search-forward (if rfc1843-decode-hzp |
| 95 | rfc1843-hzp-word-regexp |
| 96 | rfc1843-word-regexp) (point-max) t) |
| 97 | ;;; Text with extents may cause XEmacs crash |
| 98 | (setq str (buffer-substring-no-properties |
| 99 | (match-beginning 1) |
| 100 | (match-end 1))) |
| 101 | (setq firstc (aref str 0)) |
| 102 | (insert (mm-decode-coding-string |
| 103 | (rfc1843-decode |
| 104 | (prog1 |
| 105 | (substring str 1) |
| 106 | (delete-region (match-beginning 0) (match-end 0))) |
| 107 | firstc) |
| 108 | (if (eq firstc ?{) 'cn-gb-2312 'cn-big5)))) |
| 109 | (goto-char (point-min)) |
| 110 | (while (search-forward "~" (point-max) t) |
| 111 | (cond ((eq (char-after) ?\n) |
| 112 | (delete-char -1) |
| 113 | (delete-char 1)) |
| 114 | ((eq (char-after) ?~) |
| 115 | (delete-char 1))))))))) |
| 116 | |
| 117 | (defun rfc1843-decode-string (string) |
| 118 | "Decode HZ STRING and return the results." |
| 119 | (let ((m (mm-multibyte-p))) |
| 120 | (with-temp-buffer |
| 121 | (when m |
| 122 | (mm-enable-multibyte)) |
| 123 | (insert string) |
| 124 | (inline |
| 125 | (rfc1843-decode-region (point-min) (point-max))) |
| 126 | (buffer-string)))) |
| 127 | |
| 128 | (defun rfc1843-decode (word &optional firstc) |
| 129 | "Decode HZ WORD and return it." |
| 130 | (let ((i -1) (s (substring word 0)) v) |
| 131 | (if (or (not firstc) (eq firstc ?{)) |
| 132 | (while (< (incf i) (length s)) |
| 133 | (if (eq (setq v (aref s i)) ? ) nil |
| 134 | (aset s i (+ 128 v)))) |
| 135 | (while (< (incf i) (length s)) |
| 136 | (if (eq (setq v (aref s i)) ? ) nil |
| 137 | (setq v (+ (* 94 v) (aref s (1+ i)) -3135)) |
| 138 | (aset s i (+ (/ v 157) (if (eq firstc ?<) 201 161))) |
| 139 | (setq v (% v 157)) |
| 140 | (aset s (incf i) (+ v (if (< v 63) 64 98)))))) |
| 141 | s)) |
| 142 | |
| 143 | (autoload 'mail-header-parse-content-type "mail-parse") |
| 144 | (autoload 'message-narrow-to-head "message") |
| 145 | (declare-function message-fetch-field "message" (header &optional not-all)) |
| 146 | |
| 147 | (defun rfc1843-decode-article-body () |
| 148 | "Decode HZ encoded text in the article body." |
| 149 | (if (string-match (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") |
| 150 | (or gnus-newsgroup-name "")) |
| 151 | (save-excursion |
| 152 | (save-restriction |
| 153 | (message-narrow-to-head) |
| 154 | (let* ((inhibit-point-motion-hooks t) |
| 155 | (case-fold-search t) |
| 156 | (ct (message-fetch-field "Content-Type" t)) |
| 157 | (ctl (and ct (mail-header-parse-content-type ct)))) |
| 158 | (if (and ctl (not (string-match "/" (car ctl)))) |
| 159 | (setq ctl nil)) |
| 160 | (goto-char (point-max)) |
| 161 | (widen) |
| 162 | (forward-line 1) |
| 163 | (narrow-to-region (point) (point-max)) |
| 164 | (when (or (not ctl) |
| 165 | (equal (car ctl) "text/plain")) |
| 166 | (rfc1843-decode-region (point) (point-max)))))))) |
| 167 | |
| 168 | (defvar gnus-decode-header-methods) |
| 169 | (defvar gnus-decode-encoded-word-methods) |
| 170 | |
| 171 | (defun rfc1843-gnus-setup () |
| 172 | "Setup HZ decoding for Gnus." |
| 173 | (require 'gnus-art) |
| 174 | (require 'gnus-sum) |
| 175 | (add-hook 'gnus-article-decode-hook 'rfc1843-decode-article-body t) |
| 176 | (setq gnus-decode-encoded-word-function |
| 177 | 'gnus-multi-decode-encoded-word-string |
| 178 | gnus-decode-header-function |
| 179 | 'gnus-multi-decode-header |
| 180 | gnus-decode-encoded-word-methods |
| 181 | (nconc gnus-decode-encoded-word-methods |
| 182 | (list |
| 183 | (cons (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") |
| 184 | 'rfc1843-decode-string))) |
| 185 | gnus-decode-header-methods |
| 186 | (nconc gnus-decode-header-methods |
| 187 | (list |
| 188 | (cons (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") |
| 189 | 'rfc1843-decode-region))))) |
| 190 | |
| 191 | (provide 'rfc1843) |
| 192 | |
| 193 | ;;; rfc1843.el ends here |